Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 83cb43bcf1 | |||
| b310206f1f |
65
CLAUDE.md
65
CLAUDE.md
@@ -295,60 +295,61 @@ Development: ~/projects/nixosbox → git commit → git push
|
||||
Deployment: git pull → /var/lib/cm-dashboard/nixos-config → rebuild
|
||||
```
|
||||
|
||||
## NixOS Configuration Updates
|
||||
## Automated Binary Release System
|
||||
|
||||
When code changes are made to cm-dashboard, the NixOS configuration at `~/projects/nixosbox` must be updated to deploy the changes.
|
||||
**IMPLEMENTED:** cm-dashboard now uses automated binary releases instead of source builds.
|
||||
|
||||
### Update Process
|
||||
### Release Workflow
|
||||
|
||||
1. **Get Latest Commit Hash**
|
||||
1. **Automated Release Creation**
|
||||
- Gitea Actions workflow builds static binaries on tag push
|
||||
- Creates release with `cm-dashboard-linux-x86_64.tar.gz` tarball
|
||||
- No manual intervention required for binary generation
|
||||
|
||||
2. **Creating New Releases**
|
||||
```bash
|
||||
git log -1 --format="%H"
|
||||
cd ~/projects/cm-dashboard
|
||||
git tag v0.1.X
|
||||
git push origin v0.1.X
|
||||
```
|
||||
|
||||
2. **Update NixOS Configuration**
|
||||
This automatically:
|
||||
- Builds static binaries with `RUSTFLAGS="-C target-feature=+crt-static"`
|
||||
- Creates GitHub-style release with tarball
|
||||
- Uploads binaries via Gitea API
|
||||
|
||||
3. **NixOS Configuration Updates**
|
||||
Edit `~/projects/nixosbox/hosts/common/cm-dashboard.nix`:
|
||||
|
||||
```nix
|
||||
src = pkgs.fetchgit {
|
||||
url = "https://gitea.cmtec.se/cm/cm-dashboard.git";
|
||||
rev = "NEW_COMMIT_HASH_HERE";
|
||||
sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="; # Placeholder
|
||||
version = "v0.1.X";
|
||||
src = pkgs.fetchurl {
|
||||
url = "https://gitea.cmtec.se/cm/cm-dashboard/releases/download/${version}/cm-dashboard-linux-x86_64.tar.gz";
|
||||
sha256 = "sha256-NEW_HASH_HERE";
|
||||
};
|
||||
```
|
||||
|
||||
3. **Get Correct Source Hash**
|
||||
Build with placeholder hash to get the actual hash:
|
||||
|
||||
4. **Get Release Hash**
|
||||
```bash
|
||||
cd ~/projects/nixosbox
|
||||
nix-build --no-out-link -E 'with import <nixpkgs> {}; fetchgit {
|
||||
url = "https://gitea.cmtec.se/cm/cm-dashboard.git";
|
||||
rev = "NEW_COMMIT_HASH";
|
||||
nix-build --no-out-link -E 'with import <nixpkgs> {}; fetchurl {
|
||||
url = "https://gitea.cmtec.se/cm/cm-dashboard/releases/download/v0.1.X/cm-dashboard-linux-x86_64.tar.gz";
|
||||
sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
|
||||
}' 2>&1 | grep "got:"
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
error: hash mismatch in fixed-output derivation '/nix/store/...':
|
||||
specified: sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=
|
||||
got: sha256-x8crxNusOUYRrkP9mYEOG+Ga3JCPIdJLkEAc5P1ZxdQ=
|
||||
```
|
||||
|
||||
4. **Update Configuration with Correct Hash**
|
||||
Replace the placeholder with the hash from the error message (the "got:" line).
|
||||
|
||||
5. **Commit NixOS Configuration**
|
||||
|
||||
5. **Commit and Deploy**
|
||||
```bash
|
||||
cd ~/projects/nixosbox
|
||||
git add hosts/common/cm-dashboard.nix
|
||||
git commit -m "Update cm-dashboard to latest version (SHORT_HASH)"
|
||||
git commit -m "Update cm-dashboard to v0.1.X with static binaries"
|
||||
git push
|
||||
```
|
||||
|
||||
6. **Rebuild System**
|
||||
The user handles the system rebuild step - this cannot be automated.
|
||||
### Benefits
|
||||
|
||||
- **No compilation overhead** on each host
|
||||
- **Consistent static binaries** across all hosts
|
||||
- **Faster deployments** - download vs compile
|
||||
- **No library dependency issues** - static linking
|
||||
- **Automated pipeline** - tag push triggers everything
|
||||
|
||||
@@ -8,6 +8,7 @@ pub mod disk;
|
||||
pub mod error;
|
||||
pub mod memory;
|
||||
pub mod nixos;
|
||||
pub mod smart;
|
||||
pub mod systemd;
|
||||
|
||||
pub use error::CollectorError;
|
||||
|
||||
191
agent/src/collectors/smart.rs
Normal file
191
agent/src/collectors/smart.rs
Normal file
@@ -0,0 +1,191 @@
|
||||
use async_trait::async_trait;
|
||||
use cm_dashboard_shared::{Metric, MetricStatus, MetricValue};
|
||||
use std::process::Stdio;
|
||||
use tokio::process::Command;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use super::{Collector, CollectorError};
|
||||
|
||||
pub struct SmartCollector {
|
||||
hostname: String,
|
||||
}
|
||||
|
||||
impl SmartCollector {
|
||||
pub fn new(hostname: String) -> Self {
|
||||
Self { hostname }
|
||||
}
|
||||
|
||||
/// Get list of storage devices to monitor
|
||||
async fn get_devices(&self) -> Result<Vec<String>, CollectorError> {
|
||||
let output = Command::new("lsblk")
|
||||
.args(["-d", "-n", "-o", "NAME,TYPE"])
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::null())
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| CollectorError::Collection(e.to_string()))?;
|
||||
|
||||
if !output.status.success() {
|
||||
return Ok(Vec::new()); // Return empty if lsblk fails
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let mut devices = Vec::new();
|
||||
|
||||
for line in stdout.lines() {
|
||||
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||
if parts.len() >= 2 && parts[1] == "disk" {
|
||||
let device_name = parts[0];
|
||||
if device_name.starts_with("nvme") || device_name.starts_with("sd") {
|
||||
devices.push(format!("/dev/{}", device_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(devices)
|
||||
}
|
||||
|
||||
/// Collect SMART data for a single device
|
||||
async fn collect_device_smart(&self, device: &str) -> Result<Vec<Metric>, CollectorError> {
|
||||
debug!("Collecting SMART data for device: {}", device);
|
||||
|
||||
let output = Command::new("sudo")
|
||||
.args(["smartctl", "-H", "-A", device]) // Health and attributes only
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::null())
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| CollectorError::Collection(e.to_string()))?;
|
||||
|
||||
if !output.status.success() {
|
||||
warn!("smartctl failed for device: {}", device);
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
self.parse_smart_output(device, &stdout)
|
||||
}
|
||||
|
||||
/// Parse smartctl output and create metrics
|
||||
fn parse_smart_output(&self, device: &str, output: &str) -> Result<Vec<Metric>, CollectorError> {
|
||||
let mut metrics = Vec::new();
|
||||
let device_name = device.trim_start_matches("/dev/");
|
||||
|
||||
let mut health_ok = true;
|
||||
let mut temperature: Option<f64> = None;
|
||||
|
||||
for line in output.lines() {
|
||||
let line = line.trim();
|
||||
|
||||
// Parse health status
|
||||
if line.contains("SMART overall-health self-assessment") {
|
||||
if line.contains("FAILED") {
|
||||
health_ok = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse temperature from various formats
|
||||
if (line.contains("Temperature") || line.contains("Airflow_Temperature")) && temperature.is_none() {
|
||||
if let Some(temp) = self.extract_temperature(line) {
|
||||
temperature = Some(temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create health metric
|
||||
let health_status = if health_ok {
|
||||
MetricStatus::Ok
|
||||
} else {
|
||||
MetricStatus::Critical
|
||||
};
|
||||
|
||||
metrics.push(Metric {
|
||||
hostname: self.hostname.clone(),
|
||||
metric_name: format!("smart_health_{}", device_name),
|
||||
metric_value: MetricValue::String(if health_ok { "PASSED".to_string() } else { "FAILED".to_string() }),
|
||||
status: health_status,
|
||||
timestamp: chrono::Utc::now(),
|
||||
tags: vec![
|
||||
("device".to_string(), device_name.to_string()),
|
||||
("type".to_string(), "health".to_string()),
|
||||
],
|
||||
});
|
||||
|
||||
// Create temperature metric if available
|
||||
if let Some(temp) = temperature {
|
||||
let temp_status = if temp >= 70.0 {
|
||||
MetricStatus::Critical
|
||||
} else if temp >= 60.0 {
|
||||
MetricStatus::Warning
|
||||
} else {
|
||||
MetricStatus::Ok
|
||||
};
|
||||
|
||||
metrics.push(Metric {
|
||||
hostname: self.hostname.clone(),
|
||||
metric_name: format!("smart_temperature_{}", device_name),
|
||||
metric_value: MetricValue::Float(temp),
|
||||
status: temp_status,
|
||||
timestamp: chrono::Utc::now(),
|
||||
tags: vec![
|
||||
("device".to_string(), device_name.to_string()),
|
||||
("type".to_string(), "temperature".to_string()),
|
||||
("unit".to_string(), "celsius".to_string()),
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
debug!("Collected {} SMART metrics for {}", metrics.len(), device);
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
/// Extract temperature value from smartctl output line
|
||||
fn extract_temperature(&self, line: &str) -> Option<f64> {
|
||||
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||
|
||||
for (i, part) in parts.iter().enumerate() {
|
||||
if let Ok(temp) = part.parse::<f64>() {
|
||||
// Check if this looks like a temperature value (reasonable range)
|
||||
if temp > 0.0 && temp < 150.0 {
|
||||
// Check context around the number
|
||||
if i + 1 < parts.len() {
|
||||
let next = parts[i + 1].to_lowercase();
|
||||
if next.contains("celsius") || next.contains("°c") || next == "c" {
|
||||
return Some(temp);
|
||||
}
|
||||
}
|
||||
// For SMART attribute lines, temperature is often the 10th column
|
||||
if parts.len() >= 10 && (line.contains("Temperature") || line.contains("Airflow_Temperature")) {
|
||||
return Some(temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Collector for SmartCollector {
|
||||
async fn collect(&mut self) -> Result<Vec<Metric>, CollectorError> {
|
||||
debug!("Starting SMART data collection");
|
||||
|
||||
let devices = self.get_devices().await?;
|
||||
let mut all_metrics = Vec::new();
|
||||
|
||||
for device in devices {
|
||||
match self.collect_device_smart(&device).await {
|
||||
Ok(mut metrics) => {
|
||||
all_metrics.append(&mut metrics);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to collect SMART data for {}: {}", device, e);
|
||||
// Continue with other devices
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Collected {} total SMART metrics", all_metrics.len());
|
||||
Ok(all_metrics)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user