use async_trait::async_trait; use chrono::Utc; use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use tokio::fs; use super::{Collector, CollectorError}; use tracing::error; /// Backup collector that reads TOML status files for borgbackup metrics #[derive(Debug, Clone)] pub struct BackupCollector { pub backup_status_file: String, pub max_age_hours: u64, } impl BackupCollector { pub fn new(backup_status_file: Option, max_age_hours: u64) -> Self { Self { backup_status_file: backup_status_file .unwrap_or_else(|| "/var/lib/backup/backup-status.toml".to_string()), max_age_hours, } } async fn read_backup_status(&self) -> Result, CollectorError> { // Check if we're in maintenance mode if std::fs::metadata("/tmp/cm-maintenance").is_ok() { // Return special maintenance mode status let maintenance_status = BackupStatusToml { backup_name: "maintenance".to_string(), start_time: chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string(), current_time: chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string(), duration_seconds: 0, status: "pending".to_string(), last_updated: chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string(), disk_space: None, disk_product_name: None, disk_serial_number: None, disk_wear_percent: None, services: HashMap::new(), }; return Ok(Some(maintenance_status)); } // Check if backup status file exists if !std::path::Path::new(&self.backup_status_file).exists() { return Ok(None); // File doesn't exist, but this is not an error } let content = fs::read_to_string(&self.backup_status_file) .await .map_err(|e| CollectorError::SystemRead { path: self.backup_status_file.clone(), error: e.to_string(), })?; let backup_status = toml::from_str(&content).map_err(|e| CollectorError::Parse { value: "backup status TOML".to_string(), error: e.to_string(), })?; Ok(Some(backup_status)) } fn calculate_backup_status(&self, backup_status: &BackupStatusToml) -> Status { // Parse the start time to check age - handle both RFC3339 and local timestamp formats let start_time = match chrono::DateTime::parse_from_rfc3339(&backup_status.start_time) { Ok(dt) => dt.with_timezone(&Utc), Err(_) => { // Try parsing as naive datetime and assume UTC match chrono::NaiveDateTime::parse_from_str( &backup_status.start_time, "%Y-%m-%dT%H:%M:%S%.f", ) { Ok(naive_dt) => naive_dt.and_utc(), Err(_) => { error!( "Failed to parse backup timestamp: {}", backup_status.start_time ); return Status::Unknown; } } } }; let hours_since_backup = Utc::now().signed_duration_since(start_time).num_hours(); // Check overall backup status match backup_status.status.as_str() { "success" => { if hours_since_backup > self.max_age_hours as i64 { Status::Warning // Backup too old } else { Status::Ok } } "failed" => Status::Critical, "warning" => Status::Warning, // Backup completed with warnings "running" => Status::Ok, // Currently running is OK "pending" => Status::Pending, // Maintenance mode or backup starting _ => Status::Unknown, } } fn calculate_service_status(&self, service: &ServiceStatus) -> Status { match service.status.as_str() { "completed" => { if service.exit_code == 0 { Status::Ok } else { Status::Critical } } "failed" => Status::Critical, "disabled" => Status::Warning, // Service intentionally disabled "running" => Status::Ok, _ => Status::Unknown, } } fn bytes_to_gb(bytes: u64) -> f32 { bytes as f32 / (1024.0 * 1024.0 * 1024.0) } } #[async_trait] impl Collector for BackupCollector { async fn collect(&self, _status_tracker: &mut StatusTracker) -> Result, CollectorError> { let backup_status_option = self.read_backup_status().await?; let mut metrics = Vec::new(); let timestamp = chrono::Utc::now().timestamp() as u64; // If no backup status file exists, return minimal metrics indicating no backup system let backup_status = match backup_status_option { Some(status) => status, None => { // No backup system configured - return minimal "unknown" metrics metrics.push(Metric { name: "backup_overall_status".to_string(), value: MetricValue::String("no_backup_system".to_string()), status: Status::Unknown, timestamp, description: Some("No backup system configured (no status file found)".to_string()), unit: None, }); return Ok(metrics); } }; // Overall backup status let overall_status = self.calculate_backup_status(&backup_status); metrics.push(Metric { name: "backup_overall_status".to_string(), value: MetricValue::String(match overall_status { Status::Ok => "ok".to_string(), Status::Inactive => "inactive".to_string(), Status::Pending => "pending".to_string(), Status::Warning => "warning".to_string(), Status::Critical => "critical".to_string(), Status::Unknown => "unknown".to_string(), Status::Offline => "offline".to_string(), }), status: overall_status, timestamp, description: Some(format!( "Backup: {} at {}", backup_status.status, backup_status.start_time )), unit: None, }); // Backup duration metrics.push(Metric { name: "backup_duration_seconds".to_string(), value: MetricValue::Integer(backup_status.duration_seconds), status: Status::Ok, timestamp, description: Some("Duration of last backup run".to_string()), unit: Some("seconds".to_string()), }); // Last backup timestamp - use last_updated (when backup finished) instead of start_time let last_updated_dt_result = chrono::DateTime::parse_from_rfc3339(&backup_status.last_updated) .map(|dt| dt.with_timezone(&Utc)) .or_else(|_| { // Try parsing as naive datetime and assume UTC chrono::NaiveDateTime::parse_from_str( &backup_status.last_updated, "%Y-%m-%dT%H:%M:%S%.f", ) .map(|naive_dt| naive_dt.and_utc()) }); if let Ok(last_updated_dt) = last_updated_dt_result { metrics.push(Metric { name: "backup_last_run_timestamp".to_string(), value: MetricValue::Integer(last_updated_dt.timestamp()), status: Status::Ok, timestamp, description: Some("Timestamp of last backup completion".to_string()), unit: Some("unix_timestamp".to_string()), }); } else { error!( "Failed to parse backup timestamp for last_run_timestamp: {}", backup_status.last_updated ); } // Individual service metrics for (service_name, service) in &backup_status.services { let service_status = self.calculate_service_status(service); // Service status metrics.push(Metric { name: format!("backup_service_{}_status", service_name), value: MetricValue::String(match service_status { Status::Ok => "ok".to_string(), Status::Inactive => "inactive".to_string(), Status::Pending => "pending".to_string(), Status::Warning => "warning".to_string(), Status::Critical => "critical".to_string(), Status::Unknown => "unknown".to_string(), Status::Offline => "offline".to_string(), }), status: service_status, timestamp, description: Some(format!( "Backup service {} status: {}", service_name, service.status )), unit: None, }); // Service exit code metrics.push(Metric { name: format!("backup_service_{}_exit_code", service_name), value: MetricValue::Integer(service.exit_code), status: if service.exit_code == 0 { Status::Ok } else { Status::Critical }, timestamp, description: Some(format!("Exit code for backup service {}", service_name)), unit: None, }); // Repository archive count metrics.push(Metric { name: format!("backup_service_{}_archive_count", service_name), value: MetricValue::Integer(service.archive_count), status: Status::Ok, timestamp, description: Some(format!("Number of archives in {} repository", service_name)), unit: Some("archives".to_string()), }); // Repository size in GB let repo_size_gb = Self::bytes_to_gb(service.repo_size_bytes); metrics.push(Metric { name: format!("backup_service_{}_repo_size_gb", service_name), value: MetricValue::Float(repo_size_gb), status: Status::Ok, timestamp, description: Some(format!("Repository size for {} in GB", service_name)), unit: Some("GB".to_string()), }); // Repository path for reference metrics.push(Metric { name: format!("backup_service_{}_repo_path", service_name), value: MetricValue::String(service.repo_path.clone()), status: Status::Ok, timestamp, description: Some(format!("Repository path for {}", service_name)), unit: None, }); } // Total number of services metrics.push(Metric { name: "backup_total_services".to_string(), value: MetricValue::Integer(backup_status.services.len() as i64), status: Status::Ok, timestamp, description: Some("Total number of backup services".to_string()), unit: Some("services".to_string()), }); // Calculate total repository size let total_size_bytes: u64 = backup_status .services .values() .map(|s| s.repo_size_bytes) .sum(); let total_size_gb = Self::bytes_to_gb(total_size_bytes); metrics.push(Metric { name: "backup_total_repo_size_gb".to_string(), value: MetricValue::Float(total_size_gb), status: Status::Ok, timestamp, description: Some("Total size of all backup repositories".to_string()), unit: Some("GB".to_string()), }); // Disk space metrics for backup directory if let Some(ref disk_space) = backup_status.disk_space { metrics.push(Metric { name: "backup_disk_total_gb".to_string(), value: MetricValue::Float(disk_space.total_gb as f32), status: Status::Ok, timestamp, description: Some("Total disk space available for backups".to_string()), unit: Some("GB".to_string()), }); metrics.push(Metric { name: "backup_disk_used_gb".to_string(), value: MetricValue::Float(disk_space.used_gb as f32), status: Status::Ok, timestamp, description: Some("Used disk space on backup drive".to_string()), unit: Some("GB".to_string()), }); metrics.push(Metric { name: "backup_disk_available_gb".to_string(), value: MetricValue::Float(disk_space.available_gb as f32), status: Status::Ok, timestamp, description: Some("Available disk space on backup drive".to_string()), unit: Some("GB".to_string()), }); metrics.push(Metric { name: "backup_disk_usage_percent".to_string(), value: MetricValue::Float(disk_space.usage_percent as f32), status: if disk_space.usage_percent >= 95.0 { Status::Critical } else if disk_space.usage_percent >= 85.0 { Status::Warning } else { Status::Ok }, timestamp, description: Some("Backup disk usage percentage".to_string()), unit: Some("percent".to_string()), }); // Add disk identification metrics if available from disk_space if let Some(ref product_name) = disk_space.product_name { metrics.push(Metric { name: "backup_disk_product_name".to_string(), value: MetricValue::String(product_name.clone()), status: Status::Ok, timestamp, description: Some("Backup disk product name from SMART data".to_string()), unit: None, }); } if let Some(ref serial_number) = disk_space.serial_number { metrics.push(Metric { name: "backup_disk_serial_number".to_string(), value: MetricValue::String(serial_number.clone()), status: Status::Ok, timestamp, description: Some("Backup disk serial number from SMART data".to_string()), unit: None, }); } } // Add standalone disk identification metrics from TOML fields if let Some(ref product_name) = backup_status.disk_product_name { metrics.push(Metric { name: "backup_disk_product_name".to_string(), value: MetricValue::String(product_name.clone()), status: Status::Ok, timestamp, description: Some("Backup disk product name from SMART data".to_string()), unit: None, }); } if let Some(ref serial_number) = backup_status.disk_serial_number { metrics.push(Metric { name: "backup_disk_serial_number".to_string(), value: MetricValue::String(serial_number.clone()), status: Status::Ok, timestamp, description: Some("Backup disk serial number from SMART data".to_string()), unit: None, }); } if let Some(wear_percent) = backup_status.disk_wear_percent { let wear_status = if wear_percent >= 90.0 { Status::Critical } else if wear_percent >= 75.0 { Status::Warning } else { Status::Ok }; metrics.push(Metric { name: "backup_disk_wear_percent".to_string(), value: MetricValue::Float(wear_percent), status: wear_status, timestamp, description: Some("Backup disk wear percentage from SMART data".to_string()), unit: Some("percent".to_string()), }); } // Count services by status let mut status_counts = HashMap::new(); for service in backup_status.services.values() { *status_counts.entry(service.status.clone()).or_insert(0) += 1; } for (status_name, count) in status_counts { metrics.push(Metric { name: format!("backup_services_{}_count", status_name), value: MetricValue::Integer(count), status: Status::Ok, timestamp, description: Some(format!("Number of services with status: {}", status_name)), unit: Some("services".to_string()), }); } Ok(metrics) } } /// TOML structure for backup status file #[derive(Debug, Clone, Deserialize, Serialize)] pub struct BackupStatusToml { pub backup_name: String, pub start_time: String, pub current_time: String, pub duration_seconds: i64, pub status: String, pub last_updated: String, pub disk_space: Option, pub disk_product_name: Option, pub disk_serial_number: Option, pub disk_wear_percent: Option, pub services: HashMap, } #[derive(Debug, Clone, Deserialize, Serialize)] pub struct DiskSpace { pub total_bytes: u64, pub used_bytes: u64, pub available_bytes: u64, pub total_gb: f64, pub used_gb: f64, pub available_gb: f64, pub usage_percent: f64, // Optional disk identification fields pub product_name: Option, pub serial_number: Option, } #[derive(Debug, Clone, Deserialize, Serialize)] pub struct ServiceStatus { pub status: String, pub exit_code: i64, pub repo_path: String, pub archive_count: i64, pub repo_size_bytes: u64, }