use async_trait::async_trait; use cm_dashboard_shared::{Metric, MetricValue, Status}; use chrono::Utc; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use tokio::fs; use super::{Collector, CollectorError}; use tracing::error; /// Backup collector that reads TOML status files for borgbackup metrics #[derive(Debug, Clone)] pub struct BackupCollector { pub backup_status_file: String, pub max_age_hours: u64, } impl BackupCollector { pub fn new(backup_status_file: Option, max_age_hours: u64) -> Self { Self { backup_status_file: backup_status_file.unwrap_or_else(|| "/var/lib/backup/backup-status.toml".to_string()), max_age_hours, } } async fn read_backup_status(&self) -> Result { let content = fs::read_to_string(&self.backup_status_file) .await .map_err(|e| CollectorError::SystemRead { path: self.backup_status_file.clone(), error: e.to_string(), })?; toml::from_str(&content).map_err(|e| CollectorError::Parse { value: "backup status TOML".to_string(), error: e.to_string(), }) } fn calculate_backup_status(&self, backup_status: &BackupStatusToml) -> Status { // Parse the start time to check age - handle both RFC3339 and local timestamp formats let start_time = match chrono::DateTime::parse_from_rfc3339(&backup_status.start_time) { Ok(dt) => dt.with_timezone(&Utc), Err(_) => { // Try parsing as naive datetime and assume UTC match chrono::NaiveDateTime::parse_from_str(&backup_status.start_time, "%Y-%m-%dT%H:%M:%S%.f") { Ok(naive_dt) => naive_dt.and_utc(), Err(_) => { error!("Failed to parse backup timestamp: {}", backup_status.start_time); return Status::Unknown; } } } }; let hours_since_backup = Utc::now().signed_duration_since(start_time).num_hours(); // Check overall backup status match backup_status.status.as_str() { "success" => { if hours_since_backup > self.max_age_hours as i64 { Status::Warning // Backup too old } else { Status::Ok } }, "failed" => Status::Critical, "running" => Status::Ok, // Currently running is OK _ => Status::Unknown, } } fn calculate_service_status(&self, service: &ServiceStatus) -> Status { match service.status.as_str() { "completed" => { if service.exit_code == 0 { Status::Ok } else { Status::Critical } }, "failed" => Status::Critical, "disabled" => Status::Warning, // Service intentionally disabled "running" => Status::Ok, _ => Status::Unknown, } } fn bytes_to_gb(bytes: u64) -> f32 { bytes as f32 / (1024.0 * 1024.0 * 1024.0) } } #[async_trait] impl Collector for BackupCollector { fn name(&self) -> &str { "backup" } async fn collect(&self) -> Result, CollectorError> { let backup_status = self.read_backup_status().await?; let mut metrics = Vec::new(); let timestamp = chrono::Utc::now().timestamp() as u64; // Overall backup status let overall_status = self.calculate_backup_status(&backup_status); metrics.push(Metric { name: "backup_overall_status".to_string(), value: MetricValue::String(match overall_status { Status::Ok => "ok".to_string(), Status::Warning => "warning".to_string(), Status::Critical => "critical".to_string(), Status::Unknown => "unknown".to_string(), }), status: overall_status, timestamp, description: Some(format!("Backup: {} at {}", backup_status.status, backup_status.start_time)), unit: None, }); // Backup duration metrics.push(Metric { name: "backup_duration_seconds".to_string(), value: MetricValue::Integer(backup_status.duration_seconds), status: Status::Ok, timestamp, description: Some("Duration of last backup run".to_string()), unit: Some("seconds".to_string()), }); // Last backup timestamp - use last_updated (when backup finished) instead of start_time let last_updated_dt_result = chrono::DateTime::parse_from_rfc3339(&backup_status.last_updated) .map(|dt| dt.with_timezone(&Utc)) .or_else(|_| { // Try parsing as naive datetime and assume UTC chrono::NaiveDateTime::parse_from_str(&backup_status.last_updated, "%Y-%m-%dT%H:%M:%S%.f") .map(|naive_dt| naive_dt.and_utc()) }); if let Ok(last_updated_dt) = last_updated_dt_result { metrics.push(Metric { name: "backup_last_run_timestamp".to_string(), value: MetricValue::Integer(last_updated_dt.timestamp()), status: Status::Ok, timestamp, description: Some("Timestamp of last backup completion".to_string()), unit: Some("unix_timestamp".to_string()), }); } else { error!("Failed to parse backup timestamp for last_run_timestamp: {}", backup_status.last_updated); } // Individual service metrics for (service_name, service) in &backup_status.services { let service_status = self.calculate_service_status(service); // Service status metrics.push(Metric { name: format!("backup_service_{}_status", service_name), value: MetricValue::String(match service_status { Status::Ok => "ok".to_string(), Status::Warning => "warning".to_string(), Status::Critical => "critical".to_string(), Status::Unknown => "unknown".to_string(), }), status: service_status, timestamp, description: Some(format!("Backup service {} status: {}", service_name, service.status)), unit: None, }); // Service exit code metrics.push(Metric { name: format!("backup_service_{}_exit_code", service_name), value: MetricValue::Integer(service.exit_code), status: if service.exit_code == 0 { Status::Ok } else { Status::Critical }, timestamp, description: Some(format!("Exit code for backup service {}", service_name)), unit: None, }); // Repository archive count metrics.push(Metric { name: format!("backup_service_{}_archive_count", service_name), value: MetricValue::Integer(service.archive_count), status: Status::Ok, timestamp, description: Some(format!("Number of archives in {} repository", service_name)), unit: Some("archives".to_string()), }); // Repository size in GB let repo_size_gb = Self::bytes_to_gb(service.repo_size_bytes); metrics.push(Metric { name: format!("backup_service_{}_repo_size_gb", service_name), value: MetricValue::Float(repo_size_gb), status: Status::Ok, timestamp, description: Some(format!("Repository size for {} in GB", service_name)), unit: Some("GB".to_string()), }); // Repository path for reference metrics.push(Metric { name: format!("backup_service_{}_repo_path", service_name), value: MetricValue::String(service.repo_path.clone()), status: Status::Ok, timestamp, description: Some(format!("Repository path for {}", service_name)), unit: None, }); } // Total number of services metrics.push(Metric { name: "backup_total_services".to_string(), value: MetricValue::Integer(backup_status.services.len() as i64), status: Status::Ok, timestamp, description: Some("Total number of backup services".to_string()), unit: Some("services".to_string()), }); // Calculate total repository size let total_size_bytes: u64 = backup_status.services.values() .map(|s| s.repo_size_bytes) .sum(); let total_size_gb = Self::bytes_to_gb(total_size_bytes); metrics.push(Metric { name: "backup_total_repo_size_gb".to_string(), value: MetricValue::Float(total_size_gb), status: Status::Ok, timestamp, description: Some("Total size of all backup repositories".to_string()), unit: Some("GB".to_string()), }); // Disk space metrics for backup directory if let Some(ref disk_space) = backup_status.disk_space { metrics.push(Metric { name: "backup_disk_total_gb".to_string(), value: MetricValue::Float(disk_space.total_gb as f32), status: Status::Ok, timestamp, description: Some("Total disk space available for backups".to_string()), unit: Some("GB".to_string()), }); metrics.push(Metric { name: "backup_disk_used_gb".to_string(), value: MetricValue::Float(disk_space.used_gb as f32), status: Status::Ok, timestamp, description: Some("Used disk space on backup drive".to_string()), unit: Some("GB".to_string()), }); metrics.push(Metric { name: "backup_disk_available_gb".to_string(), value: MetricValue::Float(disk_space.available_gb as f32), status: Status::Ok, timestamp, description: Some("Available disk space on backup drive".to_string()), unit: Some("GB".to_string()), }); metrics.push(Metric { name: "backup_disk_usage_percent".to_string(), value: MetricValue::Float(disk_space.usage_percent as f32), status: if disk_space.usage_percent >= 95.0 { Status::Critical } else if disk_space.usage_percent >= 85.0 { Status::Warning } else { Status::Ok }, timestamp, description: Some("Backup disk usage percentage".to_string()), unit: Some("percent".to_string()), }); // Add disk identification metrics if available from disk_space if let Some(ref product_name) = disk_space.product_name { metrics.push(Metric { name: "backup_disk_product_name".to_string(), value: MetricValue::String(product_name.clone()), status: Status::Ok, timestamp, description: Some("Backup disk product name from SMART data".to_string()), unit: None, }); } if let Some(ref serial_number) = disk_space.serial_number { metrics.push(Metric { name: "backup_disk_serial_number".to_string(), value: MetricValue::String(serial_number.clone()), status: Status::Ok, timestamp, description: Some("Backup disk serial number from SMART data".to_string()), unit: None, }); } } // Add standalone disk identification metrics from TOML fields if let Some(ref product_name) = backup_status.disk_product_name { metrics.push(Metric { name: "backup_disk_product_name".to_string(), value: MetricValue::String(product_name.clone()), status: Status::Ok, timestamp, description: Some("Backup disk product name from SMART data".to_string()), unit: None, }); } if let Some(ref serial_number) = backup_status.disk_serial_number { metrics.push(Metric { name: "backup_disk_serial_number".to_string(), value: MetricValue::String(serial_number.clone()), status: Status::Ok, timestamp, description: Some("Backup disk serial number from SMART data".to_string()), unit: None, }); } // Count services by status let mut status_counts = HashMap::new(); for service in backup_status.services.values() { *status_counts.entry(service.status.clone()).or_insert(0) += 1; } for (status_name, count) in status_counts { metrics.push(Metric { name: format!("backup_services_{}_count", status_name), value: MetricValue::Integer(count), status: Status::Ok, timestamp, description: Some(format!("Number of services with status: {}", status_name)), unit: Some("services".to_string()), }); } Ok(metrics) } } /// TOML structure for backup status file #[derive(Debug, Clone, Deserialize, Serialize)] pub struct BackupStatusToml { pub backup_name: String, pub start_time: String, pub current_time: String, pub duration_seconds: i64, pub status: String, pub last_updated: String, pub disk_space: Option, pub disk_product_name: Option, pub disk_serial_number: Option, pub services: HashMap, } #[derive(Debug, Clone, Deserialize, Serialize)] pub struct DiskSpace { pub total_bytes: u64, pub used_bytes: u64, pub available_bytes: u64, pub total_gb: f64, pub used_gb: f64, pub available_gb: f64, pub usage_percent: f64, // Optional disk identification fields pub product_name: Option, pub serial_number: Option, } #[derive(Debug, Clone, Deserialize, Serialize)] pub struct ServiceStatus { pub status: String, pub exit_code: i64, pub repo_path: String, pub archive_count: i64, pub repo_size_bytes: u64, }