Implement comprehensive backup monitoring and fix timestamp issues
- Add BackupCollector for reading TOML status files with disk space metrics - Implement BackupWidget with disk usage display and service status details - Fix backup script disk space parsing by adding missing capture_output=True - Update backup widget to show actual disk usage instead of repository size - Fix timestamp parsing to use backup completion time instead of start time - Resolve timezone issues by using UTC timestamps in backup script - Add disk identification metrics (product name, serial number) to backup status - Enhance UI layout with proper backup monitoring integration
This commit is contained in:
@@ -1,12 +1,26 @@
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use cm_dashboard_shared::{Metric, MetricValue, Status};
|
||||
use std::collections::HashMap;
|
||||
use std::process::Command;
|
||||
use std::time::Instant;
|
||||
use tracing::debug;
|
||||
|
||||
use super::{Collector, CollectorError, PerformanceMetrics};
|
||||
|
||||
/// Information about a mounted disk
|
||||
#[derive(Debug, Clone)]
|
||||
struct MountedDisk {
|
||||
device: String, // e.g., "/dev/nvme0n1p1"
|
||||
physical_device: String, // e.g., "/dev/nvme0n1"
|
||||
mount_point: String, // e.g., "/"
|
||||
filesystem: String, // e.g., "ext4"
|
||||
size: String, // e.g., "120G"
|
||||
used: String, // e.g., "45G"
|
||||
available: String, // e.g., "75G"
|
||||
usage_percent: f32, // e.g., 38.5
|
||||
}
|
||||
|
||||
/// Disk usage collector for monitoring filesystem sizes
|
||||
pub struct DiskCollector {
|
||||
// Immutable collector for caching compatibility
|
||||
@@ -71,6 +85,142 @@ impl DiskCollector {
|
||||
Ok((total_bytes, used_bytes))
|
||||
}
|
||||
|
||||
/// Get root filesystem disk usage
|
||||
fn get_root_filesystem_usage(&self) -> Result<(u64, u64, f32)> {
|
||||
let (total_bytes, used_bytes) = self.get_filesystem_info("/")?;
|
||||
let usage_percent = (used_bytes as f64 / total_bytes as f64) * 100.0;
|
||||
Ok((total_bytes, used_bytes, usage_percent as f32))
|
||||
}
|
||||
|
||||
/// Get all mounted disks with their mount points and underlying devices
|
||||
fn get_mounted_disks(&self) -> Result<Vec<MountedDisk>> {
|
||||
let output = Command::new("df")
|
||||
.arg("-h")
|
||||
.arg("--output=source,target,fstype,size,used,avail,pcent")
|
||||
.output()?;
|
||||
|
||||
if !output.status.success() {
|
||||
return Err(anyhow::anyhow!("df command failed"));
|
||||
}
|
||||
|
||||
let output_str = String::from_utf8(output.stdout)?;
|
||||
let mut mounted_disks = Vec::new();
|
||||
|
||||
for line in output_str.lines().skip(1) { // Skip header
|
||||
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||
if fields.len() >= 7 {
|
||||
let source = fields[0];
|
||||
let target = fields[1];
|
||||
let fstype = fields[2];
|
||||
let size = fields[3];
|
||||
let used = fields[4];
|
||||
let avail = fields[5];
|
||||
let pcent_str = fields[6];
|
||||
|
||||
// Skip special filesystems
|
||||
if source.starts_with("/dev/") &&
|
||||
!fstype.contains("tmpfs") &&
|
||||
!fstype.contains("devtmpfs") &&
|
||||
!target.starts_with("/proc") &&
|
||||
!target.starts_with("/sys") &&
|
||||
!target.starts_with("/dev") {
|
||||
|
||||
// Extract percentage
|
||||
let usage_percent = pcent_str
|
||||
.trim_end_matches('%')
|
||||
.parse::<f32>()
|
||||
.unwrap_or(0.0);
|
||||
|
||||
// Get underlying physical device
|
||||
let physical_device = self.get_physical_device(source)?;
|
||||
|
||||
mounted_disks.push(MountedDisk {
|
||||
device: source.to_string(),
|
||||
physical_device,
|
||||
mount_point: target.to_string(),
|
||||
filesystem: fstype.to_string(),
|
||||
size: size.to_string(),
|
||||
used: used.to_string(),
|
||||
available: avail.to_string(),
|
||||
usage_percent,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(mounted_disks)
|
||||
}
|
||||
|
||||
/// Get the physical device for a given device (resolves symlinks, gets parent device)
|
||||
fn get_physical_device(&self, device: &str) -> Result<String> {
|
||||
// For NVMe: /dev/nvme0n1p1 -> /dev/nvme0n1
|
||||
if device.contains("nvme") && device.contains("p") {
|
||||
if let Some(base) = device.split('p').next() {
|
||||
return Ok(base.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// For SATA: /dev/sda1 -> /dev/sda
|
||||
if device.starts_with("/dev/sd") && device.len() > 8 {
|
||||
return Ok(device[..8].to_string()); // Keep /dev/sdX
|
||||
}
|
||||
|
||||
// For VirtIO: /dev/vda1 -> /dev/vda
|
||||
if device.starts_with("/dev/vd") && device.len() > 8 {
|
||||
return Ok(device[..8].to_string());
|
||||
}
|
||||
|
||||
// If no partition detected, return as-is
|
||||
Ok(device.to_string())
|
||||
}
|
||||
|
||||
/// Get SMART health for a specific physical device
|
||||
fn get_smart_health(&self, device: &str) -> (String, f32) {
|
||||
if let Ok(output) = Command::new("smartctl")
|
||||
.arg("-H")
|
||||
.arg(device)
|
||||
.output()
|
||||
{
|
||||
if output.status.success() {
|
||||
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||
let health_status = if output_str.contains("PASSED") {
|
||||
"PASSED"
|
||||
} else if output_str.contains("FAILED") {
|
||||
"FAILED"
|
||||
} else {
|
||||
"UNKNOWN"
|
||||
};
|
||||
|
||||
// Try to get temperature
|
||||
let temperature = if let Ok(temp_output) = Command::new("smartctl")
|
||||
.arg("-A")
|
||||
.arg(device)
|
||||
.output()
|
||||
{
|
||||
let temp_str = String::from_utf8_lossy(&temp_output.stdout);
|
||||
// Look for temperature in SMART attributes
|
||||
for line in temp_str.lines() {
|
||||
if line.contains("Temperature") && line.contains("Celsius") {
|
||||
if let Some(temp_part) = line.split_whitespace().nth(9) {
|
||||
if let Ok(temp) = temp_part.parse::<f32>() {
|
||||
return (health_status.to_string(), temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
0.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
return (health_status.to_string(), temperature);
|
||||
}
|
||||
}
|
||||
|
||||
("UNKNOWN".to_string(), 0.0)
|
||||
}
|
||||
|
||||
|
||||
/// Calculate status based on usage percentage
|
||||
fn calculate_usage_status(&self, used_bytes: u64, total_bytes: u64) -> Status {
|
||||
if total_bytes == 0 {
|
||||
@@ -88,6 +238,38 @@ impl DiskCollector {
|
||||
Status::Ok
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse size string (e.g., "120G", "45M") to GB value
|
||||
fn parse_size_to_gb(&self, size_str: &str) -> f32 {
|
||||
let size_str = size_str.trim();
|
||||
if size_str.is_empty() || size_str == "-" {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Extract numeric part and unit
|
||||
let (num_str, unit) = if let Some(last_char) = size_str.chars().last() {
|
||||
if last_char.is_alphabetic() {
|
||||
let num_part = &size_str[..size_str.len()-1];
|
||||
let unit_part = &size_str[size_str.len()-1..];
|
||||
(num_part, unit_part)
|
||||
} else {
|
||||
(size_str, "")
|
||||
}
|
||||
} else {
|
||||
(size_str, "")
|
||||
};
|
||||
|
||||
let number: f32 = num_str.parse().unwrap_or(0.0);
|
||||
|
||||
match unit.to_uppercase().as_str() {
|
||||
"T" | "TB" => number * 1024.0,
|
||||
"G" | "GB" => number,
|
||||
"M" | "MB" => number / 1024.0,
|
||||
"K" | "KB" => number / (1024.0 * 1024.0),
|
||||
"B" | "" => number / (1024.0 * 1024.0 * 1024.0),
|
||||
_ => number, // Assume GB if unknown unit
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -98,11 +280,186 @@ impl Collector for DiskCollector {
|
||||
|
||||
async fn collect(&self) -> Result<Vec<Metric>, CollectorError> {
|
||||
let start_time = Instant::now();
|
||||
debug!("Collecting disk metrics");
|
||||
debug!("Collecting multi-disk metrics");
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
// Monitor /tmp directory size
|
||||
// Collect all mounted disks
|
||||
match self.get_mounted_disks() {
|
||||
Ok(mounted_disks) => {
|
||||
debug!("Found {} mounted disks", mounted_disks.len());
|
||||
|
||||
// Group disks by physical device to avoid duplicate SMART checks
|
||||
let mut physical_devices: std::collections::HashMap<String, Vec<&MountedDisk>> = std::collections::HashMap::new();
|
||||
for disk in &mounted_disks {
|
||||
physical_devices.entry(disk.physical_device.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(disk);
|
||||
}
|
||||
|
||||
// Generate metrics for each mounted disk
|
||||
for (disk_index, disk) in mounted_disks.iter().enumerate() {
|
||||
let timestamp = chrono::Utc::now().timestamp() as u64;
|
||||
|
||||
// Parse size strings to get actual values for calculations
|
||||
let size_gb = self.parse_size_to_gb(&disk.size);
|
||||
let used_gb = self.parse_size_to_gb(&disk.used);
|
||||
let avail_gb = self.parse_size_to_gb(&disk.available);
|
||||
|
||||
// Calculate status based on usage percentage
|
||||
let status = if disk.usage_percent >= 95.0 {
|
||||
Status::Critical
|
||||
} else if disk.usage_percent >= 85.0 {
|
||||
Status::Warning
|
||||
} else {
|
||||
Status::Ok
|
||||
};
|
||||
|
||||
// Device and mount point info
|
||||
metrics.push(Metric {
|
||||
name: format!("disk_{}_device", disk_index),
|
||||
value: MetricValue::String(disk.device.clone()),
|
||||
unit: None,
|
||||
description: Some(format!("Device: {}", disk.device)),
|
||||
status: Status::Ok,
|
||||
timestamp,
|
||||
});
|
||||
|
||||
metrics.push(Metric {
|
||||
name: format!("disk_{}_mount_point", disk_index),
|
||||
value: MetricValue::String(disk.mount_point.clone()),
|
||||
unit: None,
|
||||
description: Some(format!("Mount: {}", disk.mount_point)),
|
||||
status: Status::Ok,
|
||||
timestamp,
|
||||
});
|
||||
|
||||
metrics.push(Metric {
|
||||
name: format!("disk_{}_filesystem", disk_index),
|
||||
value: MetricValue::String(disk.filesystem.clone()),
|
||||
unit: None,
|
||||
description: Some(format!("FS: {}", disk.filesystem)),
|
||||
status: Status::Ok,
|
||||
timestamp,
|
||||
});
|
||||
|
||||
// Size metrics
|
||||
metrics.push(Metric {
|
||||
name: format!("disk_{}_total_gb", disk_index),
|
||||
value: MetricValue::Float(size_gb),
|
||||
unit: Some("GB".to_string()),
|
||||
description: Some(format!("Total: {}", disk.size)),
|
||||
status: Status::Ok,
|
||||
timestamp,
|
||||
});
|
||||
|
||||
metrics.push(Metric {
|
||||
name: format!("disk_{}_used_gb", disk_index),
|
||||
value: MetricValue::Float(used_gb),
|
||||
unit: Some("GB".to_string()),
|
||||
description: Some(format!("Used: {}", disk.used)),
|
||||
status,
|
||||
timestamp,
|
||||
});
|
||||
|
||||
metrics.push(Metric {
|
||||
name: format!("disk_{}_available_gb", disk_index),
|
||||
value: MetricValue::Float(avail_gb),
|
||||
unit: Some("GB".to_string()),
|
||||
description: Some(format!("Available: {}", disk.available)),
|
||||
status: Status::Ok,
|
||||
timestamp,
|
||||
});
|
||||
|
||||
metrics.push(Metric {
|
||||
name: format!("disk_{}_usage_percent", disk_index),
|
||||
value: MetricValue::Float(disk.usage_percent),
|
||||
unit: Some("%".to_string()),
|
||||
description: Some(format!("Usage: {:.1}%", disk.usage_percent)),
|
||||
status,
|
||||
timestamp,
|
||||
});
|
||||
|
||||
// Physical device name (for SMART health grouping)
|
||||
let physical_device_name = disk.physical_device
|
||||
.strip_prefix("/dev/")
|
||||
.unwrap_or(&disk.physical_device);
|
||||
|
||||
metrics.push(Metric {
|
||||
name: format!("disk_{}_physical_device", disk_index),
|
||||
value: MetricValue::String(physical_device_name.to_string()),
|
||||
unit: None,
|
||||
description: Some(format!("Physical: {}", physical_device_name)),
|
||||
status: Status::Ok,
|
||||
timestamp,
|
||||
});
|
||||
}
|
||||
|
||||
// Add SMART health metrics for each unique physical device
|
||||
for (physical_device, disks) in physical_devices {
|
||||
let (health_status, temperature) = self.get_smart_health(&physical_device);
|
||||
let device_name = physical_device.strip_prefix("/dev/").unwrap_or(&physical_device);
|
||||
let timestamp = chrono::Utc::now().timestamp() as u64;
|
||||
|
||||
let health_status_enum = match health_status.as_str() {
|
||||
"PASSED" => Status::Ok,
|
||||
"FAILED" => Status::Critical,
|
||||
_ => Status::Unknown,
|
||||
};
|
||||
|
||||
metrics.push(Metric {
|
||||
name: format!("disk_smart_{}_health", device_name),
|
||||
value: MetricValue::String(health_status.clone()),
|
||||
unit: None,
|
||||
description: Some(format!("SMART Health: {}", health_status)),
|
||||
status: health_status_enum,
|
||||
timestamp,
|
||||
});
|
||||
|
||||
if temperature > 0.0 {
|
||||
let temp_status = if temperature >= 70.0 {
|
||||
Status::Critical
|
||||
} else if temperature >= 60.0 {
|
||||
Status::Warning
|
||||
} else {
|
||||
Status::Ok
|
||||
};
|
||||
|
||||
metrics.push(Metric {
|
||||
name: format!("disk_smart_{}_temperature", device_name),
|
||||
value: MetricValue::Float(temperature),
|
||||
unit: Some("°C".to_string()),
|
||||
description: Some(format!("Temperature: {:.0}°C", temperature)),
|
||||
status: temp_status,
|
||||
timestamp,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add disk count metric
|
||||
metrics.push(Metric {
|
||||
name: "disk_count".to_string(),
|
||||
value: MetricValue::Integer(mounted_disks.len() as i64),
|
||||
unit: None,
|
||||
description: Some(format!("Total mounted disks: {}", mounted_disks.len())),
|
||||
status: Status::Ok,
|
||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Failed to get mounted disks: {}", e);
|
||||
metrics.push(Metric {
|
||||
name: "disk_count".to_string(),
|
||||
value: MetricValue::Integer(0),
|
||||
unit: None,
|
||||
description: Some(format!("Error: {}", e)),
|
||||
status: Status::Unknown,
|
||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Monitor /tmp directory size (keep existing functionality)
|
||||
match self.get_directory_size("/tmp") {
|
||||
Ok(tmp_size_bytes) => {
|
||||
let tmp_size_mb = tmp_size_bytes as f64 / (1024.0 * 1024.0);
|
||||
@@ -161,7 +518,7 @@ impl Collector for DiskCollector {
|
||||
}
|
||||
|
||||
let collection_time = start_time.elapsed();
|
||||
debug!("Disk collection completed in {:?} with {} metrics",
|
||||
debug!("Multi-disk collection completed in {:?} with {} metrics",
|
||||
collection_time, metrics.len());
|
||||
|
||||
Ok(metrics)
|
||||
|
||||
Reference in New Issue
Block a user