use async_trait::async_trait; use chrono::Utc; use serde::{Deserialize, Serialize}; use serde_json::json; use std::io::ErrorKind; use std::process::Stdio; use std::time::Duration; use tokio::process::Command; use tokio::time::timeout; use super::{AgentType, Collector, CollectorError, CollectorOutput}; #[derive(Debug, Clone)] pub struct SmartCollector { pub enabled: bool, pub interval: Duration, pub devices: Vec, pub timeout_ms: u64, } impl SmartCollector { pub fn new(enabled: bool, interval_ms: u64, devices: Vec) -> Self { Self { enabled, interval: Duration::from_millis(interval_ms), devices, timeout_ms: 30000, // 30 second timeout for smartctl } } async fn get_smart_data(&self, device: &str) -> Result { let timeout_duration = Duration::from_millis(self.timeout_ms); let command_result = timeout( timeout_duration, Command::new("smartctl") .args(["-a", "-j", &format!("/dev/{}", device)]) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .output(), ) .await .map_err(|_| CollectorError::Timeout { duration_ms: self.timeout_ms, })?; let output = command_result.map_err(|e| match e.kind() { ErrorKind::NotFound => CollectorError::ExternalDependency { dependency: "smartctl".to_string(), message: e.to_string(), }, ErrorKind::PermissionDenied => CollectorError::PermissionDenied { message: e.to_string(), }, _ => CollectorError::CommandFailed { command: format!("smartctl -a -j /dev/{}", device), message: e.to_string(), }, })?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); let stderr_lower = stderr.to_lowercase(); if stderr_lower.contains("permission denied") { return Err(CollectorError::PermissionDenied { message: stderr.to_string(), }); } if stderr_lower.contains("no such device") || stderr_lower.contains("cannot open") { return Err(CollectorError::DeviceNotFound { device: device.to_string(), }); } return Err(CollectorError::CommandFailed { command: format!("smartctl -a -j /dev/{}", device), message: stderr.to_string(), }); } let stdout = String::from_utf8_lossy(&output.stdout); let smart_output: SmartCtlOutput = serde_json::from_str(&stdout).map_err(|e| CollectorError::ParseError { message: format!("Failed to parse smartctl output for {}: {}", device, e), })?; Ok(SmartDeviceData::from_smartctl_output(device, smart_output)) } async fn get_drive_usage( &self, device: &str, ) -> Result<(Option, Option), CollectorError> { // Get capacity first let capacity = match self.get_drive_capacity(device).await { Ok(cap) => Some(cap), Err(_) => None, }; // Try to get usage information // For simplicity, we'll use the root filesystem usage for now // In the future, this could be enhanced to map drives to specific mount points let usage = if device.contains("nvme0n1") || device.contains("sda") { // This is likely the main system drive, use root filesystem usage match self.get_disk_usage().await { Ok(disk_usage) => Some(disk_usage.used_gb), Err(_) => None, } } else { // For other drives, we don't have usage info yet None }; Ok((capacity, usage)) } async fn get_drive_capacity(&self, device: &str) -> Result { let output = Command::new("lsblk") .args(["-J", "-o", "NAME,SIZE", &format!("/dev/{}", device)]) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .output() .await .map_err(|e| CollectorError::CommandFailed { command: format!("lsblk -J -o NAME,SIZE /dev/{}", device), message: e.to_string(), })?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); return Err(CollectorError::CommandFailed { command: format!("lsblk -J -o NAME,SIZE /dev/{}", device), message: stderr.to_string(), }); } let stdout = String::from_utf8_lossy(&output.stdout); let lsblk_output: serde_json::Value = serde_json::from_str(&stdout).map_err(|e| CollectorError::ParseError { message: format!("Failed to parse lsblk JSON: {}", e), })?; // Extract size from the first blockdevice if let Some(blockdevices) = lsblk_output["blockdevices"].as_array() { if let Some(device_info) = blockdevices.first() { if let Some(size_str) = device_info["size"].as_str() { return self.parse_lsblk_size(size_str); } } } Err(CollectorError::ParseError { message: format!("No size information found for device {}", device), }) } fn parse_lsblk_size(&self, size_str: &str) -> Result { // Parse sizes like "953,9G", "1T", "512M" let size_str = size_str.replace(',', "."); // Handle European decimal separator if let Some(pos) = size_str.find(|c: char| c.is_alphabetic()) { let (number_part, unit_part) = size_str.split_at(pos); let number: f32 = number_part .parse() .map_err(|e| CollectorError::ParseError { message: format!("Failed to parse size number '{}': {}", number_part, e), })?; let multiplier = match unit_part.to_uppercase().as_str() { "T" | "TB" => 1024.0, "G" | "GB" => 1.0, "M" | "MB" => 1.0 / 1024.0, "K" | "KB" => 1.0 / (1024.0 * 1024.0), _ => { return Err(CollectorError::ParseError { message: format!("Unknown size unit: {}", unit_part), }) } }; Ok(number * multiplier) } else { Err(CollectorError::ParseError { message: format!("Invalid size format: {}", size_str), }) } } async fn get_disk_usage(&self) -> Result { let output = Command::new("df") .args(["-BG", "--output=size,used,avail", "/"]) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .output() .await .map_err(|e| CollectorError::CommandFailed { command: "df -BG --output=size,used,avail /".to_string(), message: e.to_string(), })?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); return Err(CollectorError::CommandFailed { command: "df -BG --output=size,used,avail /".to_string(), message: stderr.to_string(), }); } let stdout = String::from_utf8_lossy(&output.stdout); let lines: Vec<&str> = stdout.lines().collect(); if lines.len() < 2 { return Err(CollectorError::ParseError { message: "Unexpected df output format".to_string(), }); } // Skip header line, parse data line let data_line = lines[1].trim(); let parts: Vec<&str> = data_line.split_whitespace().collect(); if parts.len() < 3 { return Err(CollectorError::ParseError { message: format!("Unexpected df data format: {}", data_line), }); } let parse_size = |s: &str| -> Result { s.trim_end_matches('G') .parse::() .map_err(|e| CollectorError::ParseError { message: format!("Failed to parse disk size '{}': {}", s, e), }) }; Ok(DiskUsage { total_gb: parse_size(parts[0])?, used_gb: parse_size(parts[1])?, available_gb: parse_size(parts[2])?, }) } } #[async_trait] impl Collector for SmartCollector { fn name(&self) -> &str { "smart" } fn agent_type(&self) -> AgentType { AgentType::Smart } fn collect_interval(&self) -> Duration { self.interval } fn is_enabled(&self) -> bool { self.enabled } fn requires_root(&self) -> bool { true // smartctl typically requires root access } async fn collect(&self) -> Result { let mut drives = Vec::new(); let mut issues = Vec::new(); let mut healthy = 0; let mut warning = 0; let mut critical = 0; // Collect data from all configured devices for device in &self.devices { match self.get_smart_data(device).await { Ok(mut drive_data) => { // Try to get capacity and usage for this drive if let Ok((capacity, usage)) = self.get_drive_usage(device).await { drive_data.capacity_gb = capacity; drive_data.used_gb = usage; } match drive_data.health_status.as_str() { "PASSED" => healthy += 1, "FAILED" => { critical += 1; issues.push(format!("{}: SMART status FAILED", device)); } _ => { warning += 1; issues.push(format!("{}: Unknown SMART status", device)); } } drives.push(drive_data); } Err(e) => { warning += 1; issues.push(format!("{}: {}", device, e)); } } } // Get disk usage information let disk_usage = self.get_disk_usage().await?; let status = if critical > 0 { "CRITICAL" } else if warning > 0 { "WARNING" } else { "HEALTHY" }; let smart_metrics = json!({ "status": status, "drives": drives, "summary": { "healthy": healthy, "warning": warning, "critical": critical, "capacity_total_gb": disk_usage.total_gb, "capacity_used_gb": disk_usage.used_gb, "capacity_available_gb": disk_usage.available_gb }, "issues": issues, "timestamp": Utc::now() }); Ok(CollectorOutput { agent_type: AgentType::Smart, data: smart_metrics, timestamp: Utc::now(), }) } } #[derive(Debug, Clone, Serialize)] struct SmartDeviceData { name: String, temperature_c: f32, wear_level: f32, power_on_hours: u64, available_spare: f32, health_status: String, capacity_gb: Option, used_gb: Option, } impl SmartDeviceData { fn from_smartctl_output(device: &str, output: SmartCtlOutput) -> Self { let temperature_c = output.temperature.and_then(|t| t.current).unwrap_or(0.0); let wear_level = output .nvme_smart_health_information_log .as_ref() .and_then(|nvme| nvme.percentage_used) .unwrap_or(0.0); let power_on_hours = output.power_on_time.and_then(|p| p.hours).unwrap_or(0); let available_spare = output .nvme_smart_health_information_log .as_ref() .and_then(|nvme| nvme.available_spare) .unwrap_or(100.0); let health_status = output .smart_status .and_then(|s| s.passed) .map(|passed| { if passed { "PASSED".to_string() } else { "FAILED".to_string() } }) .unwrap_or_else(|| "UNKNOWN".to_string()); Self { name: device.to_string(), temperature_c, wear_level, power_on_hours, available_spare, health_status, capacity_gb: None, // Will be set later by the collector used_gb: None, // Will be set later by the collector } } } #[derive(Debug, Clone)] struct DiskUsage { total_gb: f32, used_gb: f32, available_gb: f32, } // Minimal smartctl JSON output structure - only the fields we need #[derive(Debug, Deserialize)] struct SmartCtlOutput { temperature: Option, power_on_time: Option, smart_status: Option, nvme_smart_health_information_log: Option, } #[derive(Debug, Deserialize)] struct Temperature { current: Option, } #[derive(Debug, Deserialize)] struct PowerOnTime { hours: Option, } #[derive(Debug, Deserialize)] struct SmartStatus { passed: Option, } #[derive(Debug, Deserialize)] struct NvmeSmartLog { percentage_used: Option, available_spare: Option, } #[cfg(test)] mod tests { use super::*; #[test] fn test_parse_lsblk_size() { let collector = SmartCollector::new(true, 5000, vec![]); // Test gigabyte sizes assert!((collector.parse_lsblk_size("953,9G").unwrap() - 953.9).abs() < 0.1); assert!((collector.parse_lsblk_size("1G").unwrap() - 1.0).abs() < 0.1); // Test terabyte sizes assert!((collector.parse_lsblk_size("1T").unwrap() - 1024.0).abs() < 0.1); assert!((collector.parse_lsblk_size("2,5T").unwrap() - 2560.0).abs() < 0.1); // Test megabyte sizes assert!((collector.parse_lsblk_size("512M").unwrap() - 0.5).abs() < 0.1); // Test error cases assert!(collector.parse_lsblk_size("invalid").is_err()); assert!(collector.parse_lsblk_size("1X").is_err()); } }