2025-10-12 14:53:27 +02:00

454 lines
14 KiB
Rust

use async_trait::async_trait;
use chrono::Utc;
use serde::{Deserialize, Serialize};
use serde_json::json;
use std::io::ErrorKind;
use std::process::Stdio;
use std::time::Duration;
use tokio::process::Command;
use tokio::time::timeout;
use super::{AgentType, Collector, CollectorError, CollectorOutput};
#[derive(Debug, Clone)]
pub struct SmartCollector {
pub enabled: bool,
pub interval: Duration,
pub devices: Vec<String>,
pub timeout_ms: u64,
}
impl SmartCollector {
pub fn new(enabled: bool, interval_ms: u64, devices: Vec<String>) -> Self {
Self {
enabled,
interval: Duration::from_millis(interval_ms),
devices,
timeout_ms: 30000, // 30 second timeout for smartctl
}
}
async fn get_smart_data(&self, device: &str) -> Result<SmartDeviceData, CollectorError> {
let timeout_duration = Duration::from_millis(self.timeout_ms);
let command_result = timeout(
timeout_duration,
Command::new("smartctl")
.args(["-a", "-j", &format!("/dev/{}", device)])
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output(),
)
.await
.map_err(|_| CollectorError::Timeout {
duration_ms: self.timeout_ms,
})?;
let output = command_result.map_err(|e| match e.kind() {
ErrorKind::NotFound => CollectorError::ExternalDependency {
dependency: "smartctl".to_string(),
message: e.to_string(),
},
ErrorKind::PermissionDenied => CollectorError::PermissionDenied {
message: e.to_string(),
},
_ => CollectorError::CommandFailed {
command: format!("smartctl -a -j /dev/{}", device),
message: e.to_string(),
},
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let stderr_lower = stderr.to_lowercase();
if stderr_lower.contains("permission denied") {
return Err(CollectorError::PermissionDenied {
message: stderr.to_string(),
});
}
if stderr_lower.contains("no such device") || stderr_lower.contains("cannot open") {
return Err(CollectorError::DeviceNotFound {
device: device.to_string(),
});
}
return Err(CollectorError::CommandFailed {
command: format!("smartctl -a -j /dev/{}", device),
message: stderr.to_string(),
});
}
let stdout = String::from_utf8_lossy(&output.stdout);
let smart_output: SmartCtlOutput =
serde_json::from_str(&stdout).map_err(|e| CollectorError::ParseError {
message: format!("Failed to parse smartctl output for {}: {}", device, e),
})?;
Ok(SmartDeviceData::from_smartctl_output(device, smart_output))
}
async fn get_drive_usage(
&self,
device: &str,
) -> Result<(Option<f32>, Option<f32>), CollectorError> {
// Get capacity first
let capacity = match self.get_drive_capacity(device).await {
Ok(cap) => Some(cap),
Err(_) => None,
};
// Try to get usage information
// For simplicity, we'll use the root filesystem usage for now
// In the future, this could be enhanced to map drives to specific mount points
let usage = if device.contains("nvme0n1") || device.contains("sda") {
// This is likely the main system drive, use root filesystem usage
match self.get_disk_usage().await {
Ok(disk_usage) => Some(disk_usage.used_gb),
Err(_) => None,
}
} else {
// For other drives, we don't have usage info yet
None
};
Ok((capacity, usage))
}
async fn get_drive_capacity(&self, device: &str) -> Result<f32, CollectorError> {
let output = Command::new("lsblk")
.args(["-J", "-o", "NAME,SIZE", &format!("/dev/{}", device)])
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output()
.await
.map_err(|e| CollectorError::CommandFailed {
command: format!("lsblk -J -o NAME,SIZE /dev/{}", device),
message: e.to_string(),
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(CollectorError::CommandFailed {
command: format!("lsblk -J -o NAME,SIZE /dev/{}", device),
message: stderr.to_string(),
});
}
let stdout = String::from_utf8_lossy(&output.stdout);
let lsblk_output: serde_json::Value =
serde_json::from_str(&stdout).map_err(|e| CollectorError::ParseError {
message: format!("Failed to parse lsblk JSON: {}", e),
})?;
// Extract size from the first blockdevice
if let Some(blockdevices) = lsblk_output["blockdevices"].as_array() {
if let Some(device_info) = blockdevices.first() {
if let Some(size_str) = device_info["size"].as_str() {
return self.parse_lsblk_size(size_str);
}
}
}
Err(CollectorError::ParseError {
message: format!("No size information found for device {}", device),
})
}
fn parse_lsblk_size(&self, size_str: &str) -> Result<f32, CollectorError> {
// Parse sizes like "953,9G", "1T", "512M"
let size_str = size_str.replace(',', "."); // Handle European decimal separator
if let Some(pos) = size_str.find(|c: char| c.is_alphabetic()) {
let (number_part, unit_part) = size_str.split_at(pos);
let number: f32 = number_part
.parse()
.map_err(|e| CollectorError::ParseError {
message: format!("Failed to parse size number '{}': {}", number_part, e),
})?;
let multiplier = match unit_part.to_uppercase().as_str() {
"T" | "TB" => 1024.0,
"G" | "GB" => 1.0,
"M" | "MB" => 1.0 / 1024.0,
"K" | "KB" => 1.0 / (1024.0 * 1024.0),
_ => {
return Err(CollectorError::ParseError {
message: format!("Unknown size unit: {}", unit_part),
})
}
};
Ok(number * multiplier)
} else {
Err(CollectorError::ParseError {
message: format!("Invalid size format: {}", size_str),
})
}
}
async fn get_disk_usage(&self) -> Result<DiskUsage, CollectorError> {
let output = Command::new("df")
.args(["-BG", "--output=size,used,avail", "/"])
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output()
.await
.map_err(|e| CollectorError::CommandFailed {
command: "df -BG --output=size,used,avail /".to_string(),
message: e.to_string(),
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(CollectorError::CommandFailed {
command: "df -BG --output=size,used,avail /".to_string(),
message: stderr.to_string(),
});
}
let stdout = String::from_utf8_lossy(&output.stdout);
let lines: Vec<&str> = stdout.lines().collect();
if lines.len() < 2 {
return Err(CollectorError::ParseError {
message: "Unexpected df output format".to_string(),
});
}
// Skip header line, parse data line
let data_line = lines[1].trim();
let parts: Vec<&str> = data_line.split_whitespace().collect();
if parts.len() < 3 {
return Err(CollectorError::ParseError {
message: format!("Unexpected df data format: {}", data_line),
});
}
let parse_size = |s: &str| -> Result<f32, CollectorError> {
s.trim_end_matches('G')
.parse::<f32>()
.map_err(|e| CollectorError::ParseError {
message: format!("Failed to parse disk size '{}': {}", s, e),
})
};
Ok(DiskUsage {
total_gb: parse_size(parts[0])?,
used_gb: parse_size(parts[1])?,
available_gb: parse_size(parts[2])?,
})
}
}
#[async_trait]
impl Collector for SmartCollector {
fn name(&self) -> &str {
"smart"
}
fn agent_type(&self) -> AgentType {
AgentType::Smart
}
fn collect_interval(&self) -> Duration {
self.interval
}
fn is_enabled(&self) -> bool {
self.enabled
}
fn requires_root(&self) -> bool {
true // smartctl typically requires root access
}
async fn collect(&self) -> Result<CollectorOutput, CollectorError> {
let mut drives = Vec::new();
let mut issues = Vec::new();
let mut healthy = 0;
let mut warning = 0;
let mut critical = 0;
// Collect data from all configured devices
for device in &self.devices {
match self.get_smart_data(device).await {
Ok(mut drive_data) => {
// Try to get capacity and usage for this drive
if let Ok((capacity, usage)) = self.get_drive_usage(device).await {
drive_data.capacity_gb = capacity;
drive_data.used_gb = usage;
}
match drive_data.health_status.as_str() {
"PASSED" => healthy += 1,
"FAILED" => {
critical += 1;
issues.push(format!("{}: SMART status FAILED", device));
}
_ => {
warning += 1;
issues.push(format!("{}: Unknown SMART status", device));
}
}
drives.push(drive_data);
}
Err(e) => {
warning += 1;
issues.push(format!("{}: {}", device, e));
}
}
}
// Get disk usage information
let disk_usage = self.get_disk_usage().await?;
let status = if critical > 0 {
"CRITICAL"
} else if warning > 0 {
"WARNING"
} else {
"HEALTHY"
};
let smart_metrics = json!({
"status": status,
"drives": drives,
"summary": {
"healthy": healthy,
"warning": warning,
"critical": critical,
"capacity_total_gb": disk_usage.total_gb,
"capacity_used_gb": disk_usage.used_gb,
"capacity_available_gb": disk_usage.available_gb
},
"issues": issues,
"timestamp": Utc::now()
});
Ok(CollectorOutput {
agent_type: AgentType::Smart,
data: smart_metrics,
timestamp: Utc::now(),
})
}
}
#[derive(Debug, Clone, Serialize)]
struct SmartDeviceData {
name: String,
temperature_c: f32,
wear_level: f32,
power_on_hours: u64,
available_spare: f32,
health_status: String,
capacity_gb: Option<f32>,
used_gb: Option<f32>,
}
impl SmartDeviceData {
fn from_smartctl_output(device: &str, output: SmartCtlOutput) -> Self {
let temperature_c = output.temperature.and_then(|t| t.current).unwrap_or(0.0);
let wear_level = output
.nvme_smart_health_information_log
.as_ref()
.and_then(|nvme| nvme.percentage_used)
.unwrap_or(0.0);
let power_on_hours = output.power_on_time.and_then(|p| p.hours).unwrap_or(0);
let available_spare = output
.nvme_smart_health_information_log
.as_ref()
.and_then(|nvme| nvme.available_spare)
.unwrap_or(100.0);
let health_status = output
.smart_status
.and_then(|s| s.passed)
.map(|passed| {
if passed {
"PASSED".to_string()
} else {
"FAILED".to_string()
}
})
.unwrap_or_else(|| "UNKNOWN".to_string());
Self {
name: device.to_string(),
temperature_c,
wear_level,
power_on_hours,
available_spare,
health_status,
capacity_gb: None, // Will be set later by the collector
used_gb: None, // Will be set later by the collector
}
}
}
#[derive(Debug, Clone)]
struct DiskUsage {
total_gb: f32,
used_gb: f32,
available_gb: f32,
}
// Minimal smartctl JSON output structure - only the fields we need
#[derive(Debug, Deserialize)]
struct SmartCtlOutput {
temperature: Option<Temperature>,
power_on_time: Option<PowerOnTime>,
smart_status: Option<SmartStatus>,
nvme_smart_health_information_log: Option<NvmeSmartLog>,
}
#[derive(Debug, Deserialize)]
struct Temperature {
current: Option<f32>,
}
#[derive(Debug, Deserialize)]
struct PowerOnTime {
hours: Option<u64>,
}
#[derive(Debug, Deserialize)]
struct SmartStatus {
passed: Option<bool>,
}
#[derive(Debug, Deserialize)]
struct NvmeSmartLog {
percentage_used: Option<f32>,
available_spare: Option<f32>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_lsblk_size() {
let collector = SmartCollector::new(true, 5000, vec![]);
// Test gigabyte sizes
assert!((collector.parse_lsblk_size("953,9G").unwrap() - 953.9).abs() < 0.1);
assert!((collector.parse_lsblk_size("1G").unwrap() - 1.0).abs() < 0.1);
// Test terabyte sizes
assert!((collector.parse_lsblk_size("1T").unwrap() - 1024.0).abs() < 0.1);
assert!((collector.parse_lsblk_size("2,5T").unwrap() - 2560.0).abs() < 0.1);
// Test megabyte sizes
assert!((collector.parse_lsblk_size("512M").unwrap() - 0.5).abs() < 0.1);
// Test error cases
assert!(collector.parse_lsblk_size("invalid").is_err());
assert!(collector.parse_lsblk_size("1X").is_err());
}
}