use anyhow::Result; use async_trait::async_trait; use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker, HysteresisThresholds}; use crate::config::DiskConfig; use std::fs; use std::path::Path; use std::process::Command; use std::time::Instant; use tracing::debug; use super::{Collector, CollectorError, PerformanceMetrics}; /// Information about a mounted disk #[derive(Debug, Clone)] struct MountedDisk { device: String, // e.g., "/dev/nvme0n1p1" physical_device: String, // e.g., "/dev/nvme0n1" mount_point: String, // e.g., "/" filesystem: String, // e.g., "ext4" size: String, // e.g., "120G" used: String, // e.g., "45G" available: String, // e.g., "75G" usage_percent: f32, // e.g., 38.5 config_name: Option, // Name from config if UUID-based } /// Disk usage collector for monitoring filesystem sizes pub struct DiskCollector { config: DiskConfig, temperature_thresholds: HysteresisThresholds, } impl DiskCollector { pub fn new(config: DiskConfig) -> Self { // Create hysteresis thresholds for disk temperature let temperature_thresholds = HysteresisThresholds::with_custom_gaps( 60.0, // warning at 60°C 5.0, // 5°C gap for recovery 70.0, // critical at 70°C 5.0, // 5°C gap for recovery ); Self { config, temperature_thresholds, } } /// Calculate disk temperature status using hysteresis thresholds fn calculate_temperature_status(&self, metric_name: &str, temperature: f32, status_tracker: &mut StatusTracker) -> Status { status_tracker.calculate_with_hysteresis(metric_name, temperature, &self.temperature_thresholds) } /// Resolve UUID to actual device path fn resolve_uuid_to_device(&self, uuid: &str) -> Result { let uuid_path = format!("/dev/disk/by-uuid/{}", uuid); if Path::new(&uuid_path).exists() { match fs::read_link(&uuid_path) { Ok(target) => { // Convert relative path to absolute if target.is_relative() { let parent = Path::new(&uuid_path).parent().unwrap(); let resolved = parent.join(&target); match resolved.canonicalize() { Ok(canonical) => Ok(canonical.to_string_lossy().to_string()), Err(_) => Ok(target.to_string_lossy().to_string()), } } else { Ok(target.to_string_lossy().to_string()) } } Err(e) => Err(anyhow::anyhow!("Failed to resolve UUID {}: {}", uuid, e)), } } else { Err(anyhow::anyhow!("UUID {} not found in /dev/disk/by-uuid/", uuid)) } } /// Get configured filesystems from UUIDs fn get_configured_filesystems(&self) -> Result> { let mut configured_disks = Vec::new(); for fs_config in &self.config.filesystems { if !fs_config.monitor { continue; } // Resolve UUID to device match self.resolve_uuid_to_device(&fs_config.uuid) { Ok(device_path) => { // Get filesystem stats for the mount point match self.get_filesystem_info(&fs_config.mount_point) { Ok((total_bytes, used_bytes)) => { let available_bytes = total_bytes - used_bytes; let usage_percent = if total_bytes > 0 { (used_bytes as f64 / total_bytes as f64) * 100.0 } else { 0.0 }; // Convert bytes to human-readable format let size = self.bytes_to_human_readable(total_bytes); let used = self.bytes_to_human_readable(used_bytes); let available = self.bytes_to_human_readable(available_bytes); // Get physical device for SMART monitoring let physical_device = self.get_physical_device(&device_path)?; configured_disks.push(MountedDisk { device: device_path.clone(), physical_device, mount_point: fs_config.mount_point.clone(), filesystem: fs_config.fs_type.clone(), size, used, available, usage_percent: usage_percent as f32, config_name: Some(fs_config.name.clone()), }); debug!( "Configured filesystem '{}' (UUID: {}) mounted at {} using {}", fs_config.name, fs_config.uuid, fs_config.mount_point, device_path ); } Err(e) => { debug!( "Failed to get filesystem info for configured filesystem '{}': {}", fs_config.name, e ); } } } Err(e) => { debug!( "Failed to resolve UUID for configured filesystem '{}': {}", fs_config.name, e ); } } } Ok(configured_disks) } /// Convert bytes to human-readable format fn bytes_to_human_readable(&self, bytes: u64) -> String { const UNITS: &[&str] = &["B", "K", "M", "G", "T"]; let mut size = bytes as f64; let mut unit_index = 0; while size >= 1024.0 && unit_index < UNITS.len() - 1 { size /= 1024.0; unit_index += 1; } if unit_index == 0 { format!("{:.0}{}", size, UNITS[unit_index]) } else { format!("{:.1}{}", size, UNITS[unit_index]) } } /// Get directory size using du command (efficient for single directory) fn get_directory_size(&self, path: &str) -> Result { let output = Command::new("du") .arg("-s") .arg("--block-size=1") .arg(path) .output()?; // du returns success even with permission denied warnings in stderr // We only care if the command completely failed or produced no stdout let output_str = String::from_utf8(output.stdout)?; if output_str.trim().is_empty() { return Err(anyhow::anyhow!( "du command produced no output for {}", path )); } let size_str = output_str .split_whitespace() .next() .ok_or_else(|| anyhow::anyhow!("Failed to parse du output"))?; let size_bytes = size_str.parse::()?; Ok(size_bytes) } /// Get filesystem info using df command fn get_filesystem_info(&self, path: &str) -> Result<(u64, u64)> { let output = Command::new("df") .arg("--block-size=1") .arg(path) .output()?; if !output.status.success() { return Err(anyhow::anyhow!("df command failed for {}", path)); } let output_str = String::from_utf8(output.stdout)?; let lines: Vec<&str> = output_str.lines().collect(); if lines.len() < 2 { return Err(anyhow::anyhow!("Unexpected df output format")); } let fields: Vec<&str> = lines[1].split_whitespace().collect(); if fields.len() < 4 { return Err(anyhow::anyhow!("Unexpected df fields count")); } let total_bytes = fields[1].parse::()?; let used_bytes = fields[2].parse::()?; Ok((total_bytes, used_bytes)) } /// Get the physical device for a given device (resolves symlinks, gets parent device) fn get_physical_device(&self, device: &str) -> Result { // For NVMe: /dev/nvme0n1p1 -> /dev/nvme0n1 if device.contains("nvme") && device.contains("p") { if let Some(base) = device.split('p').next() { return Ok(base.to_string()); } } // For SATA: /dev/sda1 -> /dev/sda if device.starts_with("/dev/sd") && device.len() > 8 { return Ok(device[..8].to_string()); // Keep /dev/sdX } // For VirtIO: /dev/vda1 -> /dev/vda if device.starts_with("/dev/vd") && device.len() > 8 { return Ok(device[..8].to_string()); } // If no partition detected, return as-is Ok(device.to_string()) } /// Get SMART health for a specific physical device fn get_smart_health(&self, device: &str) -> (String, f32) { if let Ok(output) = Command::new("sudo") .arg("smartctl") .arg("-H") .arg(device) .output() { if output.status.success() { let output_str = String::from_utf8_lossy(&output.stdout); let health_status = if output_str.contains("PASSED") { "PASSED" } else if output_str.contains("FAILED") { "FAILED" } else { "UNKNOWN" }; // Try to get temperature let temperature = if let Ok(temp_output) = Command::new("sudo") .arg("smartctl") .arg("-A") .arg(device) .output() { let temp_str = String::from_utf8_lossy(&temp_output.stdout); // Look for temperature in SMART attributes for line in temp_str.lines() { if line.contains("Temperature") && line.contains("Celsius") { if let Some(temp_part) = line.split_whitespace().nth(9) { if let Ok(temp) = temp_part.parse::() { return (health_status.to_string(), temp); } } } } 0.0 } else { 0.0 }; return (health_status.to_string(), temperature); } } ("UNKNOWN".to_string(), 0.0) } /// Calculate status based on usage percentage fn calculate_usage_status(&self, used_bytes: u64, total_bytes: u64) -> Status { if total_bytes == 0 { return Status::Unknown; } let usage_percent = (used_bytes as f64 / total_bytes as f64) * 100.0; // Thresholds for disk usage if usage_percent >= 95.0 { Status::Critical } else if usage_percent >= 85.0 { Status::Warning } else { Status::Ok } } /// Parse size string (e.g., "120G", "45M") to GB value fn parse_size_to_gb(&self, size_str: &str) -> f32 { let size_str = size_str.trim(); if size_str.is_empty() || size_str == "-" { return 0.0; } // Extract numeric part and unit let (num_str, unit) = if let Some(last_char) = size_str.chars().last() { if last_char.is_alphabetic() { let num_part = &size_str[..size_str.len() - 1]; let unit_part = &size_str[size_str.len() - 1..]; (num_part, unit_part) } else { (size_str, "") } } else { (size_str, "") }; let number: f32 = num_str.parse().unwrap_or(0.0); match unit.to_uppercase().as_str() { "T" | "TB" => number * 1024.0, "G" | "GB" => number, "M" | "MB" => number / 1024.0, "K" | "KB" => number / (1024.0 * 1024.0), "B" | "" => number / (1024.0 * 1024.0 * 1024.0), _ => number, // Assume GB if unknown unit } } } #[async_trait] impl Collector for DiskCollector { fn name(&self) -> &str { "disk" } async fn collect(&self, status_tracker: &mut StatusTracker) -> Result, CollectorError> { let start_time = Instant::now(); debug!("Collecting multi-disk metrics"); let mut metrics = Vec::new(); // Use UUID-based configured filesystems let mounted_disks = match self.get_configured_filesystems() { Ok(configured) => { debug!("Using UUID-based filesystems: {} found", configured.len()); configured } Err(e) => { debug!("Failed to get configured filesystems: {}", e); Vec::new() } }; // Process discovered/configured disks if !mounted_disks.is_empty() { debug!("Found {} mounted disks", mounted_disks.len()); // Group disks by physical device to avoid duplicate SMART checks let mut physical_devices: std::collections::HashMap> = std::collections::HashMap::new(); for disk in &mounted_disks { physical_devices .entry(disk.physical_device.clone()) .or_insert_with(Vec::new) .push(disk); } // Generate metrics for each mounted disk for (disk_index, disk) in mounted_disks.iter().enumerate() { let timestamp = chrono::Utc::now().timestamp() as u64; // Always use index for metric names to maintain dashboard compatibility let disk_name = disk_index.to_string(); // Parse size strings to get actual values for calculations let size_gb = self.parse_size_to_gb(&disk.size); let used_gb = self.parse_size_to_gb(&disk.used); let avail_gb = self.parse_size_to_gb(&disk.available); // Calculate status based on configured thresholds let status = if disk.usage_percent >= self.config.usage_critical_percent { Status::Critical } else if disk.usage_percent >= self.config.usage_warning_percent { Status::Warning } else { Status::Ok }; // Device and mount point info metrics.push(Metric { name: format!("disk_{}_device", disk_name), value: MetricValue::String(disk.device.clone()), unit: None, description: Some(format!("Device: {}", disk.device)), status: Status::Ok, timestamp, }); metrics.push(Metric { name: format!("disk_{}_mount_point", disk_name), value: MetricValue::String(disk.mount_point.clone()), unit: None, description: Some(format!("Mount: {}", disk.mount_point)), status: Status::Ok, timestamp, }); metrics.push(Metric { name: format!("disk_{}_filesystem", disk_name), value: MetricValue::String(disk.filesystem.clone()), unit: None, description: Some(format!("FS: {}", disk.filesystem)), status: Status::Ok, timestamp, }); // Size metrics metrics.push(Metric { name: format!("disk_{}_total_gb", disk_name), value: MetricValue::Float(size_gb), unit: Some("GB".to_string()), description: Some(format!("Total: {}", disk.size)), status: Status::Ok, timestamp, }); metrics.push(Metric { name: format!("disk_{}_used_gb", disk_name), value: MetricValue::Float(used_gb), unit: Some("GB".to_string()), description: Some(format!("Used: {}", disk.used)), status, timestamp, }); metrics.push(Metric { name: format!("disk_{}_available_gb", disk_name), value: MetricValue::Float(avail_gb), unit: Some("GB".to_string()), description: Some(format!("Available: {}", disk.available)), status: Status::Ok, timestamp, }); metrics.push(Metric { name: format!("disk_{}_usage_percent", disk_name), value: MetricValue::Float(disk.usage_percent), unit: Some("%".to_string()), description: Some(format!("Usage: {:.1}%", disk.usage_percent)), status, timestamp, }); // Physical device name (for SMART health grouping) let physical_device_name = disk .physical_device .strip_prefix("/dev/") .unwrap_or(&disk.physical_device); metrics.push(Metric { name: format!("disk_{}_physical_device", disk_name), value: MetricValue::String(physical_device_name.to_string()), unit: None, description: Some(format!("Physical: {}", physical_device_name)), status: Status::Ok, timestamp, }); } // Add SMART health metrics for each unique physical device for (physical_device, _disks) in physical_devices { let (health_status, temperature) = self.get_smart_health(&physical_device); let device_name = physical_device .strip_prefix("/dev/") .unwrap_or(&physical_device); let timestamp = chrono::Utc::now().timestamp() as u64; let health_status_enum = match health_status.as_str() { "PASSED" => Status::Ok, "FAILED" => Status::Critical, _ => Status::Unknown, }; metrics.push(Metric { name: format!("disk_smart_{}_health", device_name), value: MetricValue::String(health_status.clone()), unit: None, description: Some(format!("SMART Health: {}", health_status)), status: health_status_enum, timestamp, }); if temperature > 0.0 { let metric_name = format!("disk_smart_{}_temperature", device_name); let temp_status = self.calculate_temperature_status(&metric_name, temperature, status_tracker); metrics.push(Metric { name: format!("disk_smart_{}_temperature", device_name), value: MetricValue::Float(temperature), unit: Some("°C".to_string()), description: Some(format!("Temperature: {:.0}°C", temperature)), status: temp_status, timestamp, }); } } // Add disk count metric metrics.push(Metric { name: "disk_count".to_string(), value: MetricValue::Integer(mounted_disks.len() as i64), unit: None, description: Some(format!("Total mounted disks: {}", mounted_disks.len())), status: Status::Ok, timestamp: chrono::Utc::now().timestamp() as u64, }); } else { // No disks configured - add zero count metric metrics.push(Metric { name: "disk_count".to_string(), value: MetricValue::Integer(0), unit: None, description: Some("No disks configured for monitoring".to_string()), status: Status::Warning, timestamp: chrono::Utc::now().timestamp() as u64, }); } // Monitor /tmp directory size (keep existing functionality) match self.get_directory_size("/tmp") { Ok(tmp_size_bytes) => { let tmp_size_mb = tmp_size_bytes as f64 / (1024.0 * 1024.0); // Get /tmp filesystem info (usually tmpfs with 2GB limit) let (total_bytes, _) = match self.get_filesystem_info("/tmp") { Ok((total, used)) => (total, used), Err(_) => { // Fallback: assume 2GB limit for tmpfs (2 * 1024 * 1024 * 1024, tmp_size_bytes) } }; let total_mb = total_bytes as f64 / (1024.0 * 1024.0); let usage_percent = (tmp_size_bytes as f64 / total_bytes as f64) * 100.0; let status = self.calculate_usage_status(tmp_size_bytes, total_bytes); metrics.push(Metric { name: "disk_tmp_size_mb".to_string(), value: MetricValue::Float(tmp_size_mb as f32), unit: Some("MB".to_string()), description: Some(format!("Used: {:.1} MB", tmp_size_mb)), status, timestamp: chrono::Utc::now().timestamp() as u64, }); metrics.push(Metric { name: "disk_tmp_total_mb".to_string(), value: MetricValue::Float(total_mb as f32), unit: Some("MB".to_string()), description: Some(format!("Total: {:.1} MB", total_mb)), status: Status::Ok, timestamp: chrono::Utc::now().timestamp() as u64, }); metrics.push(Metric { name: "disk_tmp_usage_percent".to_string(), value: MetricValue::Float(usage_percent as f32), unit: Some("%".to_string()), description: Some(format!("Usage: {:.1}%", usage_percent)), status, timestamp: chrono::Utc::now().timestamp() as u64, }); } Err(e) => { debug!("Failed to get /tmp size: {}", e); metrics.push(Metric { name: "disk_tmp_size_mb".to_string(), value: MetricValue::String("error".to_string()), unit: Some("MB".to_string()), description: Some(format!("Error: {}", e)), status: Status::Unknown, timestamp: chrono::Utc::now().timestamp() as u64, }); } } let collection_time = start_time.elapsed(); debug!( "Multi-disk collection completed in {:?} with {} metrics", collection_time, metrics.len() ); Ok(metrics) } fn get_performance_metrics(&self) -> Option { None // Performance tracking handled by cache system } }