use anyhow::Result; use async_trait::async_trait; use cm_dashboard_shared::{Metric, MetricValue, Status}; use std::collections::HashMap; use std::process::Command; use std::time::Instant; use tracing::debug; use super::{Collector, CollectorError, PerformanceMetrics}; /// Information about a mounted disk #[derive(Debug, Clone)] struct MountedDisk { device: String, // e.g., "/dev/nvme0n1p1" physical_device: String, // e.g., "/dev/nvme0n1" mount_point: String, // e.g., "/" filesystem: String, // e.g., "ext4" size: String, // e.g., "120G" used: String, // e.g., "45G" available: String, // e.g., "75G" usage_percent: f32, // e.g., 38.5 } /// Disk usage collector for monitoring filesystem sizes pub struct DiskCollector { // Immutable collector for caching compatibility } impl DiskCollector { pub fn new() -> Self { Self {} } /// Get directory size using du command (efficient for single directory) fn get_directory_size(&self, path: &str) -> Result { let output = Command::new("du") .arg("-s") .arg("--block-size=1") .arg(path) .output()?; // du returns success even with permission denied warnings in stderr // We only care if the command completely failed or produced no stdout let output_str = String::from_utf8(output.stdout)?; if output_str.trim().is_empty() { return Err(anyhow::anyhow!("du command produced no output for {}", path)); } let size_str = output_str .split_whitespace() .next() .ok_or_else(|| anyhow::anyhow!("Failed to parse du output"))?; let size_bytes = size_str.parse::()?; Ok(size_bytes) } /// Get filesystem info using df command fn get_filesystem_info(&self, path: &str) -> Result<(u64, u64)> { let output = Command::new("df") .arg("--block-size=1") .arg(path) .output()?; if !output.status.success() { return Err(anyhow::anyhow!("df command failed for {}", path)); } let output_str = String::from_utf8(output.stdout)?; let lines: Vec<&str> = output_str.lines().collect(); if lines.len() < 2 { return Err(anyhow::anyhow!("Unexpected df output format")); } let fields: Vec<&str> = lines[1].split_whitespace().collect(); if fields.len() < 4 { return Err(anyhow::anyhow!("Unexpected df fields count")); } let total_bytes = fields[1].parse::()?; let used_bytes = fields[2].parse::()?; Ok((total_bytes, used_bytes)) } /// Get root filesystem disk usage fn get_root_filesystem_usage(&self) -> Result<(u64, u64, f32)> { let (total_bytes, used_bytes) = self.get_filesystem_info("/")?; let usage_percent = (used_bytes as f64 / total_bytes as f64) * 100.0; Ok((total_bytes, used_bytes, usage_percent as f32)) } /// Get all mounted disks with their mount points and underlying devices fn get_mounted_disks(&self) -> Result> { let output = Command::new("df") .arg("-h") .arg("--output=source,target,fstype,size,used,avail,pcent") .output()?; if !output.status.success() { return Err(anyhow::anyhow!("df command failed")); } let output_str = String::from_utf8(output.stdout)?; let mut mounted_disks = Vec::new(); for line in output_str.lines().skip(1) { // Skip header let fields: Vec<&str> = line.split_whitespace().collect(); if fields.len() >= 7 { let source = fields[0]; let target = fields[1]; let fstype = fields[2]; let size = fields[3]; let used = fields[4]; let avail = fields[5]; let pcent_str = fields[6]; // Skip special filesystems if source.starts_with("/dev/") && !fstype.contains("tmpfs") && !fstype.contains("devtmpfs") && !target.starts_with("/proc") && !target.starts_with("/sys") && !target.starts_with("/dev") { // Extract percentage let usage_percent = pcent_str .trim_end_matches('%') .parse::() .unwrap_or(0.0); // Get underlying physical device let physical_device = self.get_physical_device(source)?; mounted_disks.push(MountedDisk { device: source.to_string(), physical_device, mount_point: target.to_string(), filesystem: fstype.to_string(), size: size.to_string(), used: used.to_string(), available: avail.to_string(), usage_percent, }); } } } Ok(mounted_disks) } /// Get the physical device for a given device (resolves symlinks, gets parent device) fn get_physical_device(&self, device: &str) -> Result { // For NVMe: /dev/nvme0n1p1 -> /dev/nvme0n1 if device.contains("nvme") && device.contains("p") { if let Some(base) = device.split('p').next() { return Ok(base.to_string()); } } // For SATA: /dev/sda1 -> /dev/sda if device.starts_with("/dev/sd") && device.len() > 8 { return Ok(device[..8].to_string()); // Keep /dev/sdX } // For VirtIO: /dev/vda1 -> /dev/vda if device.starts_with("/dev/vd") && device.len() > 8 { return Ok(device[..8].to_string()); } // If no partition detected, return as-is Ok(device.to_string()) } /// Get SMART health for a specific physical device fn get_smart_health(&self, device: &str) -> (String, f32) { if let Ok(output) = Command::new("smartctl") .arg("-H") .arg(device) .output() { if output.status.success() { let output_str = String::from_utf8_lossy(&output.stdout); let health_status = if output_str.contains("PASSED") { "PASSED" } else if output_str.contains("FAILED") { "FAILED" } else { "UNKNOWN" }; // Try to get temperature let temperature = if let Ok(temp_output) = Command::new("smartctl") .arg("-A") .arg(device) .output() { let temp_str = String::from_utf8_lossy(&temp_output.stdout); // Look for temperature in SMART attributes for line in temp_str.lines() { if line.contains("Temperature") && line.contains("Celsius") { if let Some(temp_part) = line.split_whitespace().nth(9) { if let Ok(temp) = temp_part.parse::() { return (health_status.to_string(), temp); } } } } 0.0 } else { 0.0 }; return (health_status.to_string(), temperature); } } ("UNKNOWN".to_string(), 0.0) } /// Calculate status based on usage percentage fn calculate_usage_status(&self, used_bytes: u64, total_bytes: u64) -> Status { if total_bytes == 0 { return Status::Unknown; } let usage_percent = (used_bytes as f64 / total_bytes as f64) * 100.0; // Thresholds for disk usage if usage_percent >= 95.0 { Status::Critical } else if usage_percent >= 85.0 { Status::Warning } else { Status::Ok } } /// Parse size string (e.g., "120G", "45M") to GB value fn parse_size_to_gb(&self, size_str: &str) -> f32 { let size_str = size_str.trim(); if size_str.is_empty() || size_str == "-" { return 0.0; } // Extract numeric part and unit let (num_str, unit) = if let Some(last_char) = size_str.chars().last() { if last_char.is_alphabetic() { let num_part = &size_str[..size_str.len()-1]; let unit_part = &size_str[size_str.len()-1..]; (num_part, unit_part) } else { (size_str, "") } } else { (size_str, "") }; let number: f32 = num_str.parse().unwrap_or(0.0); match unit.to_uppercase().as_str() { "T" | "TB" => number * 1024.0, "G" | "GB" => number, "M" | "MB" => number / 1024.0, "K" | "KB" => number / (1024.0 * 1024.0), "B" | "" => number / (1024.0 * 1024.0 * 1024.0), _ => number, // Assume GB if unknown unit } } } #[async_trait] impl Collector for DiskCollector { fn name(&self) -> &str { "disk" } async fn collect(&self) -> Result, CollectorError> { let start_time = Instant::now(); debug!("Collecting multi-disk metrics"); let mut metrics = Vec::new(); // Collect all mounted disks match self.get_mounted_disks() { Ok(mounted_disks) => { debug!("Found {} mounted disks", mounted_disks.len()); // Group disks by physical device to avoid duplicate SMART checks let mut physical_devices: std::collections::HashMap> = std::collections::HashMap::new(); for disk in &mounted_disks { physical_devices.entry(disk.physical_device.clone()) .or_insert_with(Vec::new) .push(disk); } // Generate metrics for each mounted disk for (disk_index, disk) in mounted_disks.iter().enumerate() { let timestamp = chrono::Utc::now().timestamp() as u64; // Parse size strings to get actual values for calculations let size_gb = self.parse_size_to_gb(&disk.size); let used_gb = self.parse_size_to_gb(&disk.used); let avail_gb = self.parse_size_to_gb(&disk.available); // Calculate status based on usage percentage let status = if disk.usage_percent >= 95.0 { Status::Critical } else if disk.usage_percent >= 85.0 { Status::Warning } else { Status::Ok }; // Device and mount point info metrics.push(Metric { name: format!("disk_{}_device", disk_index), value: MetricValue::String(disk.device.clone()), unit: None, description: Some(format!("Device: {}", disk.device)), status: Status::Ok, timestamp, }); metrics.push(Metric { name: format!("disk_{}_mount_point", disk_index), value: MetricValue::String(disk.mount_point.clone()), unit: None, description: Some(format!("Mount: {}", disk.mount_point)), status: Status::Ok, timestamp, }); metrics.push(Metric { name: format!("disk_{}_filesystem", disk_index), value: MetricValue::String(disk.filesystem.clone()), unit: None, description: Some(format!("FS: {}", disk.filesystem)), status: Status::Ok, timestamp, }); // Size metrics metrics.push(Metric { name: format!("disk_{}_total_gb", disk_index), value: MetricValue::Float(size_gb), unit: Some("GB".to_string()), description: Some(format!("Total: {}", disk.size)), status: Status::Ok, timestamp, }); metrics.push(Metric { name: format!("disk_{}_used_gb", disk_index), value: MetricValue::Float(used_gb), unit: Some("GB".to_string()), description: Some(format!("Used: {}", disk.used)), status, timestamp, }); metrics.push(Metric { name: format!("disk_{}_available_gb", disk_index), value: MetricValue::Float(avail_gb), unit: Some("GB".to_string()), description: Some(format!("Available: {}", disk.available)), status: Status::Ok, timestamp, }); metrics.push(Metric { name: format!("disk_{}_usage_percent", disk_index), value: MetricValue::Float(disk.usage_percent), unit: Some("%".to_string()), description: Some(format!("Usage: {:.1}%", disk.usage_percent)), status, timestamp, }); // Physical device name (for SMART health grouping) let physical_device_name = disk.physical_device .strip_prefix("/dev/") .unwrap_or(&disk.physical_device); metrics.push(Metric { name: format!("disk_{}_physical_device", disk_index), value: MetricValue::String(physical_device_name.to_string()), unit: None, description: Some(format!("Physical: {}", physical_device_name)), status: Status::Ok, timestamp, }); } // Add SMART health metrics for each unique physical device for (physical_device, disks) in physical_devices { let (health_status, temperature) = self.get_smart_health(&physical_device); let device_name = physical_device.strip_prefix("/dev/").unwrap_or(&physical_device); let timestamp = chrono::Utc::now().timestamp() as u64; let health_status_enum = match health_status.as_str() { "PASSED" => Status::Ok, "FAILED" => Status::Critical, _ => Status::Unknown, }; metrics.push(Metric { name: format!("disk_smart_{}_health", device_name), value: MetricValue::String(health_status.clone()), unit: None, description: Some(format!("SMART Health: {}", health_status)), status: health_status_enum, timestamp, }); if temperature > 0.0 { let temp_status = if temperature >= 70.0 { Status::Critical } else if temperature >= 60.0 { Status::Warning } else { Status::Ok }; metrics.push(Metric { name: format!("disk_smart_{}_temperature", device_name), value: MetricValue::Float(temperature), unit: Some("°C".to_string()), description: Some(format!("Temperature: {:.0}°C", temperature)), status: temp_status, timestamp, }); } } // Add disk count metric metrics.push(Metric { name: "disk_count".to_string(), value: MetricValue::Integer(mounted_disks.len() as i64), unit: None, description: Some(format!("Total mounted disks: {}", mounted_disks.len())), status: Status::Ok, timestamp: chrono::Utc::now().timestamp() as u64, }); } Err(e) => { debug!("Failed to get mounted disks: {}", e); metrics.push(Metric { name: "disk_count".to_string(), value: MetricValue::Integer(0), unit: None, description: Some(format!("Error: {}", e)), status: Status::Unknown, timestamp: chrono::Utc::now().timestamp() as u64, }); } } // Monitor /tmp directory size (keep existing functionality) match self.get_directory_size("/tmp") { Ok(tmp_size_bytes) => { let tmp_size_mb = tmp_size_bytes as f64 / (1024.0 * 1024.0); // Get /tmp filesystem info (usually tmpfs with 2GB limit) let (total_bytes, _) = match self.get_filesystem_info("/tmp") { Ok((total, used)) => (total, used), Err(_) => { // Fallback: assume 2GB limit for tmpfs (2 * 1024 * 1024 * 1024, tmp_size_bytes) } }; let total_mb = total_bytes as f64 / (1024.0 * 1024.0); let usage_percent = (tmp_size_bytes as f64 / total_bytes as f64) * 100.0; let status = self.calculate_usage_status(tmp_size_bytes, total_bytes); metrics.push(Metric { name: "disk_tmp_size_mb".to_string(), value: MetricValue::Float(tmp_size_mb as f32), unit: Some("MB".to_string()), description: Some(format!("Used: {:.1} MB", tmp_size_mb)), status, timestamp: chrono::Utc::now().timestamp() as u64, }); metrics.push(Metric { name: "disk_tmp_total_mb".to_string(), value: MetricValue::Float(total_mb as f32), unit: Some("MB".to_string()), description: Some(format!("Total: {:.1} MB", total_mb)), status: Status::Ok, timestamp: chrono::Utc::now().timestamp() as u64, }); metrics.push(Metric { name: "disk_tmp_usage_percent".to_string(), value: MetricValue::Float(usage_percent as f32), unit: Some("%".to_string()), description: Some(format!("Usage: {:.1}%", usage_percent)), status, timestamp: chrono::Utc::now().timestamp() as u64, }); } Err(e) => { debug!("Failed to get /tmp size: {}", e); metrics.push(Metric { name: "disk_tmp_size_mb".to_string(), value: MetricValue::String("error".to_string()), unit: Some("MB".to_string()), description: Some(format!("Error: {}", e)), status: Status::Unknown, timestamp: chrono::Utc::now().timestamp() as u64, }); } } let collection_time = start_time.elapsed(); debug!("Multi-disk collection completed in {:?} with {} metrics", collection_time, metrics.len()); Ok(metrics) } fn get_performance_metrics(&self) -> Option { None // Performance tracking handled by cache system } }