diff --git a/Cargo.lock b/Cargo.lock index 326d4ce..99d31c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cm-dashboard" -version = "0.1.151" +version = "0.1.153" dependencies = [ "anyhow", "chrono", @@ -301,7 +301,7 @@ dependencies = [ [[package]] name = "cm-dashboard-agent" -version = "0.1.151" +version = "0.1.153" dependencies = [ "anyhow", "async-trait", @@ -324,7 +324,7 @@ dependencies = [ [[package]] name = "cm-dashboard-shared" -version = "0.1.151" +version = "0.1.153" dependencies = [ "chrono", "serde", diff --git a/agent/Cargo.toml b/agent/Cargo.toml index a4c9a84..8d11b6f 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-agent" -version = "0.1.152" +version = "0.1.153" edition = "2021" [dependencies] diff --git a/agent/src/collectors/disk.rs b/agent/src/collectors/disk.rs index 9358e2f..f623fa0 100644 --- a/agent/src/collectors/disk.rs +++ b/agent/src/collectors/disk.rs @@ -76,11 +76,17 @@ impl DiskCollector { let mount_devices = self.get_mount_devices().await?; // Step 2: Get filesystem usage for each mount point using df - let filesystem_usage = self.get_filesystem_usage(&mount_devices).map_err(|e| CollectorError::Parse { + let mut filesystem_usage = self.get_filesystem_usage(&mount_devices).map_err(|e| CollectorError::Parse { value: "filesystem usage".to_string(), error: format!("Failed to get filesystem usage: {}", e), })?; + // Step 2.5: Add MergerFS mount points that weren't in lsblk output + self.add_mergerfs_filesystem_usage(&mut filesystem_usage).map_err(|e| CollectorError::Parse { + value: "mergerfs filesystem usage".to_string(), + error: format!("Failed to get mergerfs filesystem usage: {}", e), + })?; + // Step 3: Detect MergerFS pools let mergerfs_pools = self.detect_mergerfs_pools(&filesystem_usage).map_err(|e| CollectorError::Parse { value: "mergerfs pools".to_string(), @@ -156,6 +162,30 @@ impl DiskCollector { Ok(filesystem_usage) } + /// Add filesystem usage for MergerFS mount points that aren't in lsblk + fn add_mergerfs_filesystem_usage(&self, filesystem_usage: &mut HashMap) -> anyhow::Result<()> { + let mounts_content = std::fs::read_to_string("/proc/mounts") + .map_err(|e| anyhow::anyhow!("Failed to read /proc/mounts: {}", e))?; + + for line in mounts_content.lines() { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() >= 3 && parts[2] == "fuse.mergerfs" { + let mount_point = parts[1].to_string(); + + // Only add if we don't already have usage data for this mount point + if !filesystem_usage.contains_key(&mount_point) { + if let Ok((total, used)) = self.get_filesystem_info(&mount_point) { + debug!("Added MergerFS filesystem usage for {}: {}GB total, {}GB used", + mount_point, total as f32 / (1024.0 * 1024.0 * 1024.0), used as f32 / (1024.0 * 1024.0 * 1024.0)); + filesystem_usage.insert(mount_point, (total, used)); + } + } + } + } + + Ok(()) + } + /// Get filesystem info for a single mount point fn get_filesystem_info(&self, mount_point: &str) -> Result<(u64, u64), CollectorError> { let output = Command::new("df") @@ -511,8 +541,8 @@ impl DiskCollector { /// Populate pools data into AgentData fn populate_pools_data(&self, mergerfs_pools: &[MergerfsPool], smart_data: &HashMap, agent_data: &mut AgentData) -> Result<(), CollectorError> { for pool in mergerfs_pools { - // Calculate pool health based on member drive health - let (pool_health, data_drive_data, parity_drive_data) = self.calculate_pool_health(pool, smart_data); + // Calculate pool health and statuses based on member drive health + let (pool_health, health_status, usage_status, data_drive_data, parity_drive_data) = self.calculate_pool_health(pool, smart_data); let pool_data = PoolData { name: pool.name.clone(), @@ -526,6 +556,8 @@ impl DiskCollector { total_gb: pool.total_bytes as f32 / (1024.0 * 1024.0 * 1024.0), data_drives: data_drive_data, parity_drives: parity_drive_data, + health_status, + usage_status, }; agent_data.system.storage.pools.push(pool_data); @@ -535,7 +567,7 @@ impl DiskCollector { } /// Calculate pool health based on member drive status - fn calculate_pool_health(&self, pool: &MergerfsPool, smart_data: &HashMap) -> (String, Vec, Vec) { + fn calculate_pool_health(&self, pool: &MergerfsPool, smart_data: &HashMap) -> (String, cm_dashboard_shared::Status, cm_dashboard_shared::Status, Vec, Vec) { let mut failed_data = 0; let mut failed_parity = 0; @@ -543,16 +575,23 @@ impl DiskCollector { let data_drive_data: Vec = pool.data_drives.iter().map(|d| { let smart = smart_data.get(&d.name); let health = smart.map(|s| s.health.clone()).unwrap_or_else(|| "UNKNOWN".to_string()); + let temperature = smart.and_then(|s| s.temperature_celsius).or(d.temperature_celsius); if health == "FAILED" { failed_data += 1; } + // Calculate drive statuses using config thresholds + let health_status = self.calculate_health_status(&health); + let temperature_status = temperature.map(|t| self.temperature_thresholds.evaluate(t)).unwrap_or(cm_dashboard_shared::Status::Unknown); + cm_dashboard_shared::PoolDriveData { name: d.name.clone(), - temperature_celsius: smart.and_then(|s| s.temperature_celsius).or(d.temperature_celsius), + temperature_celsius: temperature, health, wear_percent: smart.and_then(|s| s.wear_percent), + health_status, + temperature_status, } }).collect(); @@ -560,27 +599,47 @@ impl DiskCollector { let parity_drive_data: Vec = pool.parity_drives.iter().map(|d| { let smart = smart_data.get(&d.name); let health = smart.map(|s| s.health.clone()).unwrap_or_else(|| "UNKNOWN".to_string()); + let temperature = smart.and_then(|s| s.temperature_celsius).or(d.temperature_celsius); if health == "FAILED" { failed_parity += 1; } + // Calculate drive statuses using config thresholds + let health_status = self.calculate_health_status(&health); + let temperature_status = temperature.map(|t| self.temperature_thresholds.evaluate(t)).unwrap_or(cm_dashboard_shared::Status::Unknown); + cm_dashboard_shared::PoolDriveData { name: d.name.clone(), - temperature_celsius: smart.and_then(|s| s.temperature_celsius).or(d.temperature_celsius), + temperature_celsius: temperature, health, wear_percent: smart.and_then(|s| s.wear_percent), + health_status, + temperature_status, } }).collect(); - // Calculate overall pool health - let pool_health = match (failed_data, failed_parity) { - (0, 0) => "healthy".to_string(), - (1, 0) | (0, 1) => "degraded".to_string(), // One failure is degraded but recoverable - _ => "critical".to_string(), // Multiple failures are critical + // Calculate overall pool health string and status + let (pool_health, health_status) = match (failed_data, failed_parity) { + (0, 0) => ("healthy".to_string(), cm_dashboard_shared::Status::Ok), + (1, 0) | (0, 1) => ("degraded".to_string(), cm_dashboard_shared::Status::Warning), + _ => ("critical".to_string(), cm_dashboard_shared::Status::Critical), }; - (pool_health, data_drive_data, parity_drive_data) + // Calculate pool usage status using config thresholds + let usage_percent = if pool.total_bytes > 0 { + (pool.used_bytes as f32 / pool.total_bytes as f32) * 100.0 + } else { 0.0 }; + + let usage_status = if usage_percent >= self.config.usage_critical_percent { + cm_dashboard_shared::Status::Critical + } else if usage_percent >= self.config.usage_warning_percent { + cm_dashboard_shared::Status::Warning + } else { + cm_dashboard_shared::Status::Ok + }; + + (pool_health, health_status, usage_status, data_drive_data, parity_drive_data) } /// Calculate filesystem usage status diff --git a/dashboard/Cargo.toml b/dashboard/Cargo.toml index c9f9a89..c49fbb9 100644 --- a/dashboard/Cargo.toml +++ b/dashboard/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard" -version = "0.1.152" +version = "0.1.153" edition = "2021" [dependencies] diff --git a/dashboard/src/ui/widgets/system.rs b/dashboard/src/ui/widgets/system.rs index c0da17c..13b3b4d 100644 --- a/dashboard/src/ui/widgets/system.rs +++ b/dashboard/src/ui/widgets/system.rs @@ -273,6 +273,17 @@ impl SystemWidget { // Convert pools (MergerFS, RAID, etc.) for pool in &agent_data.system.storage.pools { + // Use agent-calculated status (combined health and usage status) + let pool_status = if pool.health_status == Status::Critical || pool.usage_status == Status::Critical { + Status::Critical + } else if pool.health_status == Status::Warning || pool.usage_status == Status::Warning { + Status::Warning + } else if pool.health_status == Status::Ok && pool.usage_status == Status::Ok { + Status::Ok + } else { + Status::Unknown + }; + let mut storage_pool = StoragePool { name: pool.name.clone(), mount_point: pool.mount.clone(), @@ -284,27 +295,49 @@ impl SystemWidget { usage_percent: Some(pool.usage_percent), used_gb: Some(pool.used_gb), total_gb: Some(pool.total_gb), - status: Status::Ok, // TODO: map pool health to status + status: pool_status, }; - // Add data drives + // Add data drives - use agent-calculated status for drive in &pool.data_drives { + // Use combined health and temperature status + let drive_status = if drive.health_status == Status::Critical || drive.temperature_status == Status::Critical { + Status::Critical + } else if drive.health_status == Status::Warning || drive.temperature_status == Status::Warning { + Status::Warning + } else if drive.health_status == Status::Ok && drive.temperature_status == Status::Ok { + Status::Ok + } else { + Status::Unknown + }; + let storage_drive = StorageDrive { name: drive.name.clone(), temperature: drive.temperature_celsius, wear_percent: drive.wear_percent, - status: Status::Ok, // TODO: map drive health to status + status: drive_status, }; storage_pool.data_drives.push(storage_drive); } - // Add parity drives + // Add parity drives - use agent-calculated status for drive in &pool.parity_drives { + // Use combined health and temperature status + let drive_status = if drive.health_status == Status::Critical || drive.temperature_status == Status::Critical { + Status::Critical + } else if drive.health_status == Status::Warning || drive.temperature_status == Status::Warning { + Status::Warning + } else if drive.health_status == Status::Ok && drive.temperature_status == Status::Ok { + Status::Ok + } else { + Status::Unknown + }; + let storage_drive = StorageDrive { name: drive.name.clone(), temperature: drive.temperature_celsius, wear_percent: drive.wear_percent, - status: Status::Ok, // TODO: map drive health to status + status: drive_status, }; storage_pool.parity_drives.push(storage_drive); } @@ -403,7 +436,8 @@ impl SystemWidget { // Data Disks section if !pool.data_drives.is_empty() { lines.push(Line::from(vec![ - Span::styled(" ├─ Data Disks:", Typography::secondary()) + Span::styled(" ├─ ", Typography::tree()), + Span::styled("Data Disks:", Typography::secondary()) ])); for (i, drive) in pool.data_drives.iter().enumerate() { let is_last = i == pool.data_drives.len() - 1; @@ -414,7 +448,6 @@ impl SystemWidget { // Parity section if !pool.parity_drives.is_empty() { - let parity_symbol = " ├─ Parity: "; for drive in &pool.parity_drives { let mut drive_details = Vec::new(); if let Some(temp) = drive.temperature { @@ -431,7 +464,8 @@ impl SystemWidget { }; let mut parity_spans = vec![ - Span::styled(parity_symbol, Typography::tree()), + Span::styled(" ├─ ", Typography::tree()), + Span::styled("Parity: ", Typography::secondary()), ]; parity_spans.extend(StatusIcons::create_status_spans(drive.status.clone(), &drive_text)); lines.push(Line::from(parity_spans)); diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 8f175ae..5fce42e 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-shared" -version = "0.1.152" +version = "0.1.153" edition = "2021" [dependencies] diff --git a/shared/src/agent_data.rs b/shared/src/agent_data.rs index dd0f119..d9d96bf 100644 --- a/shared/src/agent_data.rs +++ b/shared/src/agent_data.rs @@ -96,6 +96,8 @@ pub struct PoolData { pub total_gb: f32, pub data_drives: Vec, pub parity_drives: Vec, + pub health_status: Status, + pub usage_status: Status, } /// Drive in a storage pool @@ -105,6 +107,8 @@ pub struct PoolDriveData { pub temperature_celsius: Option, pub wear_percent: Option, pub health: String, + pub health_status: Status, + pub temperature_status: Status, } /// Service monitoring data