- Remove unused fields from CommandStatus variants - Clean up unused methods and unused collector fields - Fix lifetime syntax warning in SystemWidget - Delete unused cache module completely - Remove redundant render methods from widgets All agent and dashboard warnings eliminated while preserving panel switching and scrolling functionality.
436 lines
16 KiB
Rust
436 lines
16 KiB
Rust
use async_trait::async_trait;
|
|
use chrono::Utc;
|
|
use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker};
|
|
use serde::{Deserialize, Serialize};
|
|
use std::collections::HashMap;
|
|
use tokio::fs;
|
|
|
|
use super::{Collector, CollectorError};
|
|
use tracing::error;
|
|
|
|
/// Backup collector that reads TOML status files for borgbackup metrics
|
|
#[derive(Debug, Clone)]
|
|
pub struct BackupCollector {
|
|
pub backup_status_file: String,
|
|
pub max_age_hours: u64,
|
|
}
|
|
|
|
impl BackupCollector {
|
|
pub fn new(backup_status_file: Option<String>, max_age_hours: u64) -> Self {
|
|
Self {
|
|
backup_status_file: backup_status_file
|
|
.unwrap_or_else(|| "/var/lib/backup/backup-status.toml".to_string()),
|
|
max_age_hours,
|
|
}
|
|
}
|
|
|
|
async fn read_backup_status(&self) -> Result<Option<BackupStatusToml>, CollectorError> {
|
|
// Check if backup status file exists
|
|
if !std::path::Path::new(&self.backup_status_file).exists() {
|
|
return Ok(None); // File doesn't exist, but this is not an error
|
|
}
|
|
|
|
let content = fs::read_to_string(&self.backup_status_file)
|
|
.await
|
|
.map_err(|e| CollectorError::SystemRead {
|
|
path: self.backup_status_file.clone(),
|
|
error: e.to_string(),
|
|
})?;
|
|
|
|
let backup_status = toml::from_str(&content).map_err(|e| CollectorError::Parse {
|
|
value: "backup status TOML".to_string(),
|
|
error: e.to_string(),
|
|
})?;
|
|
|
|
Ok(Some(backup_status))
|
|
}
|
|
|
|
fn calculate_backup_status(&self, backup_status: &BackupStatusToml) -> Status {
|
|
// Parse the start time to check age - handle both RFC3339 and local timestamp formats
|
|
let start_time = match chrono::DateTime::parse_from_rfc3339(&backup_status.start_time) {
|
|
Ok(dt) => dt.with_timezone(&Utc),
|
|
Err(_) => {
|
|
// Try parsing as naive datetime and assume UTC
|
|
match chrono::NaiveDateTime::parse_from_str(
|
|
&backup_status.start_time,
|
|
"%Y-%m-%dT%H:%M:%S%.f",
|
|
) {
|
|
Ok(naive_dt) => naive_dt.and_utc(),
|
|
Err(_) => {
|
|
error!(
|
|
"Failed to parse backup timestamp: {}",
|
|
backup_status.start_time
|
|
);
|
|
return Status::Unknown;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
let hours_since_backup = Utc::now().signed_duration_since(start_time).num_hours();
|
|
|
|
// Check overall backup status
|
|
match backup_status.status.as_str() {
|
|
"success" => {
|
|
if hours_since_backup > self.max_age_hours as i64 {
|
|
Status::Warning // Backup too old
|
|
} else {
|
|
Status::Ok
|
|
}
|
|
}
|
|
"failed" => Status::Critical,
|
|
"running" => Status::Ok, // Currently running is OK
|
|
_ => Status::Unknown,
|
|
}
|
|
}
|
|
|
|
fn calculate_service_status(&self, service: &ServiceStatus) -> Status {
|
|
match service.status.as_str() {
|
|
"completed" => {
|
|
if service.exit_code == 0 {
|
|
Status::Ok
|
|
} else {
|
|
Status::Critical
|
|
}
|
|
}
|
|
"failed" => Status::Critical,
|
|
"disabled" => Status::Warning, // Service intentionally disabled
|
|
"running" => Status::Ok,
|
|
_ => Status::Unknown,
|
|
}
|
|
}
|
|
|
|
fn bytes_to_gb(bytes: u64) -> f32 {
|
|
bytes as f32 / (1024.0 * 1024.0 * 1024.0)
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Collector for BackupCollector {
|
|
|
|
async fn collect(&self, _status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
|
let backup_status_option = self.read_backup_status().await?;
|
|
let mut metrics = Vec::new();
|
|
let timestamp = chrono::Utc::now().timestamp() as u64;
|
|
|
|
// If no backup status file exists, return minimal metrics indicating no backup system
|
|
let backup_status = match backup_status_option {
|
|
Some(status) => status,
|
|
None => {
|
|
// No backup system configured - return minimal "unknown" metrics
|
|
metrics.push(Metric {
|
|
name: "backup_overall_status".to_string(),
|
|
value: MetricValue::String("no_backup_system".to_string()),
|
|
status: Status::Unknown,
|
|
timestamp,
|
|
description: Some("No backup system configured (no status file found)".to_string()),
|
|
unit: None,
|
|
});
|
|
return Ok(metrics);
|
|
}
|
|
};
|
|
|
|
// Overall backup status
|
|
let overall_status = self.calculate_backup_status(&backup_status);
|
|
metrics.push(Metric {
|
|
name: "backup_overall_status".to_string(),
|
|
value: MetricValue::String(match overall_status {
|
|
Status::Ok => "ok".to_string(),
|
|
Status::Pending => "pending".to_string(),
|
|
Status::Warning => "warning".to_string(),
|
|
Status::Critical => "critical".to_string(),
|
|
Status::Unknown => "unknown".to_string(),
|
|
}),
|
|
status: overall_status,
|
|
timestamp,
|
|
description: Some(format!(
|
|
"Backup: {} at {}",
|
|
backup_status.status, backup_status.start_time
|
|
)),
|
|
unit: None,
|
|
});
|
|
|
|
// Backup duration
|
|
metrics.push(Metric {
|
|
name: "backup_duration_seconds".to_string(),
|
|
value: MetricValue::Integer(backup_status.duration_seconds),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Duration of last backup run".to_string()),
|
|
unit: Some("seconds".to_string()),
|
|
});
|
|
|
|
// Last backup timestamp - use last_updated (when backup finished) instead of start_time
|
|
let last_updated_dt_result =
|
|
chrono::DateTime::parse_from_rfc3339(&backup_status.last_updated)
|
|
.map(|dt| dt.with_timezone(&Utc))
|
|
.or_else(|_| {
|
|
// Try parsing as naive datetime and assume UTC
|
|
chrono::NaiveDateTime::parse_from_str(
|
|
&backup_status.last_updated,
|
|
"%Y-%m-%dT%H:%M:%S%.f",
|
|
)
|
|
.map(|naive_dt| naive_dt.and_utc())
|
|
});
|
|
|
|
if let Ok(last_updated_dt) = last_updated_dt_result {
|
|
metrics.push(Metric {
|
|
name: "backup_last_run_timestamp".to_string(),
|
|
value: MetricValue::Integer(last_updated_dt.timestamp()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Timestamp of last backup completion".to_string()),
|
|
unit: Some("unix_timestamp".to_string()),
|
|
});
|
|
} else {
|
|
error!(
|
|
"Failed to parse backup timestamp for last_run_timestamp: {}",
|
|
backup_status.last_updated
|
|
);
|
|
}
|
|
|
|
// Individual service metrics
|
|
for (service_name, service) in &backup_status.services {
|
|
let service_status = self.calculate_service_status(service);
|
|
|
|
// Service status
|
|
metrics.push(Metric {
|
|
name: format!("backup_service_{}_status", service_name),
|
|
value: MetricValue::String(match service_status {
|
|
Status::Ok => "ok".to_string(),
|
|
Status::Pending => "pending".to_string(),
|
|
Status::Warning => "warning".to_string(),
|
|
Status::Critical => "critical".to_string(),
|
|
Status::Unknown => "unknown".to_string(),
|
|
}),
|
|
status: service_status,
|
|
timestamp,
|
|
description: Some(format!(
|
|
"Backup service {} status: {}",
|
|
service_name, service.status
|
|
)),
|
|
unit: None,
|
|
});
|
|
|
|
// Service exit code
|
|
metrics.push(Metric {
|
|
name: format!("backup_service_{}_exit_code", service_name),
|
|
value: MetricValue::Integer(service.exit_code),
|
|
status: if service.exit_code == 0 {
|
|
Status::Ok
|
|
} else {
|
|
Status::Critical
|
|
},
|
|
timestamp,
|
|
description: Some(format!("Exit code for backup service {}", service_name)),
|
|
unit: None,
|
|
});
|
|
|
|
// Repository archive count
|
|
metrics.push(Metric {
|
|
name: format!("backup_service_{}_archive_count", service_name),
|
|
value: MetricValue::Integer(service.archive_count),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some(format!("Number of archives in {} repository", service_name)),
|
|
unit: Some("archives".to_string()),
|
|
});
|
|
|
|
// Repository size in GB
|
|
let repo_size_gb = Self::bytes_to_gb(service.repo_size_bytes);
|
|
metrics.push(Metric {
|
|
name: format!("backup_service_{}_repo_size_gb", service_name),
|
|
value: MetricValue::Float(repo_size_gb),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some(format!("Repository size for {} in GB", service_name)),
|
|
unit: Some("GB".to_string()),
|
|
});
|
|
|
|
// Repository path for reference
|
|
metrics.push(Metric {
|
|
name: format!("backup_service_{}_repo_path", service_name),
|
|
value: MetricValue::String(service.repo_path.clone()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some(format!("Repository path for {}", service_name)),
|
|
unit: None,
|
|
});
|
|
}
|
|
|
|
// Total number of services
|
|
metrics.push(Metric {
|
|
name: "backup_total_services".to_string(),
|
|
value: MetricValue::Integer(backup_status.services.len() as i64),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Total number of backup services".to_string()),
|
|
unit: Some("services".to_string()),
|
|
});
|
|
|
|
// Calculate total repository size
|
|
let total_size_bytes: u64 = backup_status
|
|
.services
|
|
.values()
|
|
.map(|s| s.repo_size_bytes)
|
|
.sum();
|
|
let total_size_gb = Self::bytes_to_gb(total_size_bytes);
|
|
metrics.push(Metric {
|
|
name: "backup_total_repo_size_gb".to_string(),
|
|
value: MetricValue::Float(total_size_gb),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Total size of all backup repositories".to_string()),
|
|
unit: Some("GB".to_string()),
|
|
});
|
|
|
|
// Disk space metrics for backup directory
|
|
if let Some(ref disk_space) = backup_status.disk_space {
|
|
metrics.push(Metric {
|
|
name: "backup_disk_total_gb".to_string(),
|
|
value: MetricValue::Float(disk_space.total_gb as f32),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Total disk space available for backups".to_string()),
|
|
unit: Some("GB".to_string()),
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: "backup_disk_used_gb".to_string(),
|
|
value: MetricValue::Float(disk_space.used_gb as f32),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Used disk space on backup drive".to_string()),
|
|
unit: Some("GB".to_string()),
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: "backup_disk_available_gb".to_string(),
|
|
value: MetricValue::Float(disk_space.available_gb as f32),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Available disk space on backup drive".to_string()),
|
|
unit: Some("GB".to_string()),
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: "backup_disk_usage_percent".to_string(),
|
|
value: MetricValue::Float(disk_space.usage_percent as f32),
|
|
status: if disk_space.usage_percent >= 95.0 {
|
|
Status::Critical
|
|
} else if disk_space.usage_percent >= 85.0 {
|
|
Status::Warning
|
|
} else {
|
|
Status::Ok
|
|
},
|
|
timestamp,
|
|
description: Some("Backup disk usage percentage".to_string()),
|
|
unit: Some("percent".to_string()),
|
|
});
|
|
|
|
// Add disk identification metrics if available from disk_space
|
|
if let Some(ref product_name) = disk_space.product_name {
|
|
metrics.push(Metric {
|
|
name: "backup_disk_product_name".to_string(),
|
|
value: MetricValue::String(product_name.clone()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Backup disk product name from SMART data".to_string()),
|
|
unit: None,
|
|
});
|
|
}
|
|
|
|
if let Some(ref serial_number) = disk_space.serial_number {
|
|
metrics.push(Metric {
|
|
name: "backup_disk_serial_number".to_string(),
|
|
value: MetricValue::String(serial_number.clone()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Backup disk serial number from SMART data".to_string()),
|
|
unit: None,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Add standalone disk identification metrics from TOML fields
|
|
if let Some(ref product_name) = backup_status.disk_product_name {
|
|
metrics.push(Metric {
|
|
name: "backup_disk_product_name".to_string(),
|
|
value: MetricValue::String(product_name.clone()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Backup disk product name from SMART data".to_string()),
|
|
unit: None,
|
|
});
|
|
}
|
|
|
|
if let Some(ref serial_number) = backup_status.disk_serial_number {
|
|
metrics.push(Metric {
|
|
name: "backup_disk_serial_number".to_string(),
|
|
value: MetricValue::String(serial_number.clone()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some("Backup disk serial number from SMART data".to_string()),
|
|
unit: None,
|
|
});
|
|
}
|
|
|
|
// Count services by status
|
|
let mut status_counts = HashMap::new();
|
|
for service in backup_status.services.values() {
|
|
*status_counts.entry(service.status.clone()).or_insert(0) += 1;
|
|
}
|
|
|
|
for (status_name, count) in status_counts {
|
|
metrics.push(Metric {
|
|
name: format!("backup_services_{}_count", status_name),
|
|
value: MetricValue::Integer(count),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
description: Some(format!("Number of services with status: {}", status_name)),
|
|
unit: Some("services".to_string()),
|
|
});
|
|
}
|
|
|
|
Ok(metrics)
|
|
}
|
|
}
|
|
|
|
/// TOML structure for backup status file
|
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
pub struct BackupStatusToml {
|
|
pub backup_name: String,
|
|
pub start_time: String,
|
|
pub current_time: String,
|
|
pub duration_seconds: i64,
|
|
pub status: String,
|
|
pub last_updated: String,
|
|
pub disk_space: Option<DiskSpace>,
|
|
pub disk_product_name: Option<String>,
|
|
pub disk_serial_number: Option<String>,
|
|
pub services: HashMap<String, ServiceStatus>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
pub struct DiskSpace {
|
|
pub total_bytes: u64,
|
|
pub used_bytes: u64,
|
|
pub available_bytes: u64,
|
|
pub total_gb: f64,
|
|
pub used_gb: f64,
|
|
pub available_gb: f64,
|
|
pub usage_percent: f64,
|
|
// Optional disk identification fields
|
|
pub product_name: Option<String>,
|
|
pub serial_number: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
pub struct ServiceStatus {
|
|
pub status: String,
|
|
pub exit_code: i64,
|
|
pub repo_path: String,
|
|
pub archive_count: i64,
|
|
pub repo_size_bytes: u64,
|
|
}
|