- Remove unused imports (Duration, HashMap, SharedError, DateTime, etc.) - Fix unused variables by prefixing with underscore - Remove redundant dashboard.toml config file - Update theme imports to use only needed components - Maintain all functionality while reducing warnings - Add srv02 to predefined hosts configuration - Remove unused broadcast_command methods
387 lines
15 KiB
Rust
387 lines
15 KiB
Rust
use async_trait::async_trait;
|
|
use cm_dashboard_shared::{Metric, MetricValue, Status};
|
|
use chrono::Utc;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::collections::HashMap;
|
|
use tokio::fs;
|
|
|
|
use super::{Collector, CollectorError};
|
|
use tracing::error;
|
|
|
|
/// Backup collector that reads TOML status files for borgbackup metrics
|
|
#[derive(Debug, Clone)]
|
|
pub struct BackupCollector {
|
|
pub backup_status_file: String,
|
|
pub max_age_hours: u64,
|
|
}
|
|
|
|
impl BackupCollector {
|
|
pub fn new(backup_status_file: Option<String>, max_age_hours: u64) -> Self {
|
|
Self {
|
|
backup_status_file: backup_status_file.unwrap_or_else(|| "/var/lib/backup/backup-status.toml".to_string()),
|
|
max_age_hours,
|
|
}
|
|
}
|
|
|
|
async fn read_backup_status(&self) -> Result<BackupStatusToml, CollectorError> {
|
|
let content = fs::read_to_string(&self.backup_status_file)
|
|
.await
|
|
.map_err(|e| CollectorError::SystemRead {
|
|
path: self.backup_status_file.clone(),
|
|
error: e.to_string(),
|
|
})?;
|
|
|
|
toml::from_str(&content).map_err(|e| CollectorError::Parse {
|
|
value: "backup status TOML".to_string(),
|
|
error: e.to_string(),
|
|
})
|
|
}
|
|
|
|
fn calculate_backup_status(&self, backup_status: &BackupStatusToml) -> Status {
|
|
// Parse the start time to check age - handle both RFC3339 and local timestamp formats
|
|
let start_time = match chrono::DateTime::parse_from_rfc3339(&backup_status.start_time) {
|
|
Ok(dt) => dt.with_timezone(&Utc),
|
|
Err(_) => {
|
|
// Try parsing as naive datetime and assume UTC
|
|
match chrono::NaiveDateTime::parse_from_str(&backup_status.start_time, "%Y-%m-%dT%H:%M:%S%.f") {
|
|
Ok(naive_dt) => naive_dt.and_utc(),
|
|
Err(_) => {
|
|
error!("Failed to parse backup timestamp: {}", backup_status.start_time);
|
|
return Status::Unknown;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
let hours_since_backup = Utc::now().signed_duration_since(start_time).num_hours();
|
|
|
|
// Check overall backup status
|
|
match backup_status.status.as_str() {
|
|
"success" => {
|
|
if hours_since_backup > self.max_age_hours as i64 {
|
|
Status::Warning // Backup too old
|
|
} else {
|
|
Status::Ok
|
|
}
|
|
},
|
|
"failed" => Status::Critical,
|
|
"running" => Status::Ok, // Currently running is OK
|
|
_ => Status::Unknown,
|
|
}
|
|
}
|
|
|
|
fn calculate_service_status(&self, service: &ServiceStatus) -> Status {
|
|
match service.status.as_str() {
|
|
"completed" => {
|
|
if service.exit_code == 0 {
|
|
Status::Ok
|
|
} else {
|
|
Status::Critical
|
|
}
|
|
},
|
|
"failed" => Status::Critical,
|
|
"disabled" => Status::Warning, // Service intentionally disabled
|
|
"running" => Status::Ok,
|
|
_ => Status::Unknown,
|
|
}
|
|
}
|
|
|
|
fn bytes_to_gb(bytes: u64) -> f32 {
|
|
bytes as f32 / (1024.0 * 1024.0 * 1024.0)
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Collector for BackupCollector {
|
|
fn name(&self) -> &str {
|
|
"backup"
|
|
}
|
|
|
|
async fn collect(&self) -> Result<Vec<Metric>, CollectorError> {
|
|
let backup_status = self.read_backup_status().await?;
|
|
let mut metrics = Vec::new();
|
|
let timestamp = chrono::Utc::now().timestamp() as u64;
|
|
|
|
// Overall backup status
|
|
let overall_status = self.calculate_backup_status(&backup_status);
|
|
metrics.push(Metric {
|
|
name: "backup_overall_status".to_string(),
|
|
value: MetricValue::String(match overall_status {
|
|
Status::Ok => "ok".to_string(),
|
|
Status::Warning => "warning".to_string(),
|
|
Status::Critical => "critical".to_string(),
|
|
Status::Unknown => "unknown".to_string(),
|
|
}),
|
|
status: overall_status,
|
|
timestamp,
|
|
description: Some(format!("Backup: {} at {}", backup_status.status, backup_status.start_time)),
|
|
unit: None,
|
|
});
|
|
|
|
// Backup duration
|
|
metrics.push(Metric {
|
|
name: "backup_duration_seconds".to_string(),
|
|
value: MetricValue::Integer(backup_status.duration_seconds),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Duration of last backup run".to_string()),
|
|
unit: Some("seconds".to_string()),
|
|
});
|
|
|
|
// Last backup timestamp - use last_updated (when backup finished) instead of start_time
|
|
let last_updated_dt_result = chrono::DateTime::parse_from_rfc3339(&backup_status.last_updated)
|
|
.map(|dt| dt.with_timezone(&Utc))
|
|
.or_else(|_| {
|
|
// Try parsing as naive datetime and assume UTC
|
|
chrono::NaiveDateTime::parse_from_str(&backup_status.last_updated, "%Y-%m-%dT%H:%M:%S%.f")
|
|
.map(|naive_dt| naive_dt.and_utc())
|
|
});
|
|
|
|
if let Ok(last_updated_dt) = last_updated_dt_result {
|
|
metrics.push(Metric {
|
|
name: "backup_last_run_timestamp".to_string(),
|
|
value: MetricValue::Integer(last_updated_dt.timestamp()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Timestamp of last backup completion".to_string()),
|
|
unit: Some("unix_timestamp".to_string()),
|
|
});
|
|
} else {
|
|
error!("Failed to parse backup timestamp for last_run_timestamp: {}", backup_status.last_updated);
|
|
}
|
|
|
|
// Individual service metrics
|
|
for (service_name, service) in &backup_status.services {
|
|
let service_status = self.calculate_service_status(service);
|
|
|
|
// Service status
|
|
metrics.push(Metric {
|
|
name: format!("backup_service_{}_status", service_name),
|
|
value: MetricValue::String(match service_status {
|
|
Status::Ok => "ok".to_string(),
|
|
Status::Warning => "warning".to_string(),
|
|
Status::Critical => "critical".to_string(),
|
|
Status::Unknown => "unknown".to_string(),
|
|
}),
|
|
status: service_status,
|
|
timestamp,
|
|
description: Some(format!("Backup service {} status: {}", service_name, service.status)),
|
|
unit: None,
|
|
});
|
|
|
|
// Service exit code
|
|
metrics.push(Metric {
|
|
name: format!("backup_service_{}_exit_code", service_name),
|
|
value: MetricValue::Integer(service.exit_code),
|
|
status: if service.exit_code == 0 { Status::Ok } else { Status::Critical },
|
|
timestamp,
|
|
description: Some(format!("Exit code for backup service {}", service_name)),
|
|
unit: None,
|
|
});
|
|
|
|
// Repository archive count
|
|
metrics.push(Metric {
|
|
name: format!("backup_service_{}_archive_count", service_name),
|
|
value: MetricValue::Integer(service.archive_count),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some(format!("Number of archives in {} repository", service_name)),
|
|
unit: Some("archives".to_string()),
|
|
});
|
|
|
|
// Repository size in GB
|
|
let repo_size_gb = Self::bytes_to_gb(service.repo_size_bytes);
|
|
metrics.push(Metric {
|
|
name: format!("backup_service_{}_repo_size_gb", service_name),
|
|
value: MetricValue::Float(repo_size_gb),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some(format!("Repository size for {} in GB", service_name)),
|
|
unit: Some("GB".to_string()),
|
|
});
|
|
|
|
// Repository path for reference
|
|
metrics.push(Metric {
|
|
name: format!("backup_service_{}_repo_path", service_name),
|
|
value: MetricValue::String(service.repo_path.clone()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some(format!("Repository path for {}", service_name)),
|
|
unit: None,
|
|
});
|
|
}
|
|
|
|
// Total number of services
|
|
metrics.push(Metric {
|
|
name: "backup_total_services".to_string(),
|
|
value: MetricValue::Integer(backup_status.services.len() as i64),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Total number of backup services".to_string()),
|
|
unit: Some("services".to_string()),
|
|
});
|
|
|
|
// Calculate total repository size
|
|
let total_size_bytes: u64 = backup_status.services.values()
|
|
.map(|s| s.repo_size_bytes)
|
|
.sum();
|
|
let total_size_gb = Self::bytes_to_gb(total_size_bytes);
|
|
metrics.push(Metric {
|
|
name: "backup_total_repo_size_gb".to_string(),
|
|
value: MetricValue::Float(total_size_gb),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Total size of all backup repositories".to_string()),
|
|
unit: Some("GB".to_string()),
|
|
});
|
|
|
|
// Disk space metrics for backup directory
|
|
if let Some(ref disk_space) = backup_status.disk_space {
|
|
metrics.push(Metric {
|
|
name: "backup_disk_total_gb".to_string(),
|
|
value: MetricValue::Float(disk_space.total_gb as f32),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Total disk space available for backups".to_string()),
|
|
unit: Some("GB".to_string()),
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: "backup_disk_used_gb".to_string(),
|
|
value: MetricValue::Float(disk_space.used_gb as f32),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Used disk space on backup drive".to_string()),
|
|
unit: Some("GB".to_string()),
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: "backup_disk_available_gb".to_string(),
|
|
value: MetricValue::Float(disk_space.available_gb as f32),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Available disk space on backup drive".to_string()),
|
|
unit: Some("GB".to_string()),
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: "backup_disk_usage_percent".to_string(),
|
|
value: MetricValue::Float(disk_space.usage_percent as f32),
|
|
status: if disk_space.usage_percent >= 95.0 {
|
|
Status::Critical
|
|
} else if disk_space.usage_percent >= 85.0 {
|
|
Status::Warning
|
|
} else {
|
|
Status::Ok
|
|
},
|
|
timestamp,
|
|
description: Some("Backup disk usage percentage".to_string()),
|
|
unit: Some("percent".to_string()),
|
|
});
|
|
|
|
// Add disk identification metrics if available from disk_space
|
|
if let Some(ref product_name) = disk_space.product_name {
|
|
metrics.push(Metric {
|
|
name: "backup_disk_product_name".to_string(),
|
|
value: MetricValue::String(product_name.clone()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Backup disk product name from SMART data".to_string()),
|
|
unit: None,
|
|
});
|
|
}
|
|
|
|
if let Some(ref serial_number) = disk_space.serial_number {
|
|
metrics.push(Metric {
|
|
name: "backup_disk_serial_number".to_string(),
|
|
value: MetricValue::String(serial_number.clone()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Backup disk serial number from SMART data".to_string()),
|
|
unit: None,
|
|
});
|
|
}
|
|
|
|
}
|
|
|
|
// Add standalone disk identification metrics from TOML fields
|
|
if let Some(ref product_name) = backup_status.disk_product_name {
|
|
metrics.push(Metric {
|
|
name: "backup_disk_product_name".to_string(),
|
|
value: MetricValue::String(product_name.clone()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Backup disk product name from SMART data".to_string()),
|
|
unit: None,
|
|
});
|
|
}
|
|
|
|
if let Some(ref serial_number) = backup_status.disk_serial_number {
|
|
metrics.push(Metric {
|
|
name: "backup_disk_serial_number".to_string(),
|
|
value: MetricValue::String(serial_number.clone()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Backup disk serial number from SMART data".to_string()),
|
|
unit: None,
|
|
});
|
|
}
|
|
|
|
// Count services by status
|
|
let mut status_counts = HashMap::new();
|
|
for service in backup_status.services.values() {
|
|
*status_counts.entry(service.status.clone()).or_insert(0) += 1;
|
|
}
|
|
|
|
for (status_name, count) in status_counts {
|
|
metrics.push(Metric {
|
|
name: format!("backup_services_{}_count", status_name),
|
|
value: MetricValue::Integer(count),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some(format!("Number of services with status: {}", status_name)),
|
|
unit: Some("services".to_string()),
|
|
});
|
|
}
|
|
|
|
Ok(metrics)
|
|
}
|
|
}
|
|
|
|
/// TOML structure for backup status file
|
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
pub struct BackupStatusToml {
|
|
pub backup_name: String,
|
|
pub start_time: String,
|
|
pub current_time: String,
|
|
pub duration_seconds: i64,
|
|
pub status: String,
|
|
pub last_updated: String,
|
|
pub disk_space: Option<DiskSpace>,
|
|
pub disk_product_name: Option<String>,
|
|
pub disk_serial_number: Option<String>,
|
|
pub services: HashMap<String, ServiceStatus>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
pub struct DiskSpace {
|
|
pub total_bytes: u64,
|
|
pub used_bytes: u64,
|
|
pub available_bytes: u64,
|
|
pub total_gb: f64,
|
|
pub used_gb: f64,
|
|
pub available_gb: f64,
|
|
pub usage_percent: f64,
|
|
// Optional disk identification fields
|
|
pub product_name: Option<String>,
|
|
pub serial_number: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
pub struct ServiceStatus {
|
|
pub status: String,
|
|
pub exit_code: i64,
|
|
pub repo_path: String,
|
|
pub archive_count: i64,
|
|
pub repo_size_bytes: u64,
|
|
} |