Implement comprehensive backup monitoring and fix timestamp issues

- Add BackupCollector for reading TOML status files with disk space metrics
- Implement BackupWidget with disk usage display and service status details
- Fix backup script disk space parsing by adding missing capture_output=True
- Update backup widget to show actual disk usage instead of repository size
- Fix timestamp parsing to use backup completion time instead of start time
- Resolve timezone issues by using UTC timestamps in backup script
- Add disk identification metrics (product name, serial number) to backup status
- Enhance UI layout with proper backup monitoring integration
This commit is contained in:
2025-10-18 18:33:41 +02:00
parent 8a36472a3d
commit 125111ee99
19 changed files with 2788 additions and 1020 deletions

View File

@@ -25,6 +25,12 @@ struct ServiceCacheState {
last_discovery_time: Option<Instant>,
/// How often to rediscover services (5 minutes)
discovery_interval_seconds: u64,
/// Cached nginx site latency metrics
nginx_site_metrics: Vec<Metric>,
/// Last time nginx sites were checked
last_nginx_check_time: Option<Instant>,
/// How often to check nginx site latency (30 seconds)
nginx_check_interval_seconds: u64,
}
impl SystemdCollector {
@@ -35,6 +41,9 @@ impl SystemdCollector {
monitored_services: Vec::new(),
last_discovery_time: None,
discovery_interval_seconds: 300, // 5 minutes
nginx_site_metrics: Vec::new(),
last_nginx_check_time: None,
nginx_check_interval_seconds: 30, // 30 seconds for nginx sites
}),
}
}
@@ -71,6 +80,32 @@ impl SystemdCollector {
Ok(state.monitored_services.clone())
}
/// Get nginx site metrics, checking them if cache is expired
fn get_nginx_site_metrics(&self) -> Vec<Metric> {
let mut state = self.state.write().unwrap();
// Check if we need to refresh nginx site metrics
let needs_refresh = match state.last_nginx_check_time {
None => true, // First time
Some(last_time) => {
let elapsed = last_time.elapsed().as_secs();
elapsed >= state.nginx_check_interval_seconds
}
};
if needs_refresh {
// Only check nginx sites if nginx service is active
if state.monitored_services.iter().any(|s| s.contains("nginx")) {
debug!("Refreshing nginx site latency metrics (interval: {}s)", state.nginx_check_interval_seconds);
let fresh_metrics = self.get_nginx_sites();
state.nginx_site_metrics = fresh_metrics;
state.last_nginx_check_time = Some(Instant::now());
}
}
state.nginx_site_metrics.clone()
}
/// Auto-discover interesting services to monitor
fn discover_services(&self) -> Result<Vec<String>> {
let output = Command::new("systemctl")
@@ -88,22 +123,86 @@ impl SystemdCollector {
let output_str = String::from_utf8(output.stdout)?;
let mut services = Vec::new();
// Interesting service patterns to monitor
let interesting_patterns = [
"nginx", "apache", "httpd", "gitea", "docker", "mysql", "postgresql",
"redis", "ssh", "sshd", "postfix", "mosquitto", "grafana", "prometheus",
"vaultwarden", "unifi", "immich", "plex", "jellyfin", "transmission",
"syncthing", "nextcloud", "owncloud", "mariadb", "mongodb"
// Skip setup/certificate services that don't need monitoring (from legacy)
let excluded_services = [
"mosquitto-certs",
"immich-setup",
"phpfpm-kryddorten",
"phpfpm-mariehall2",
"acme-haasp.net",
"acme-selfsigned-haasp",
"borgbackup",
"haasp-site-deploy",
"mosquitto-backup",
"nginx-config-reload",
"sshd-keygen",
];
// Define patterns for services we want to monitor (from legacy)
let interesting_services = [
// Web applications
"gitea",
"immich",
"vaultwarden",
"unifi",
"wordpress",
"nginx",
"httpd",
// Databases
"postgresql",
"mysql",
"mariadb",
"redis",
"mongodb",
"mongod",
// Backup and storage
"borg",
"rclone",
// Container runtimes
"docker",
// CI/CD services
"gitea-actions",
"gitea-runner",
"actions-runner",
// Network services
"sshd",
"dnsmasq",
// MQTT and IoT services
"mosquitto",
"mqtt",
// PHP-FPM services
"phpfpm",
// Home automation
"haasp",
// Backup services
"backup",
];
for line in output_str.lines() {
let fields: Vec<&str> = line.split_whitespace().collect();
if fields.len() >= 4 && fields[0].ends_with(".service") {
let service_name = fields[0].trim_end_matches(".service");
debug!("Processing service: '{}'", service_name);
// Skip excluded services first
let mut is_excluded = false;
for excluded in &excluded_services {
if service_name.contains(excluded) {
debug!("EXCLUDING service '{}' because it matches pattern '{}'", service_name, excluded);
is_excluded = true;
break;
}
}
if is_excluded {
debug!("Skipping excluded service: '{}'", service_name);
continue;
}
// Check if this service matches our interesting patterns
for pattern in &interesting_patterns {
if service_name.contains(pattern) {
for pattern in &interesting_services {
if service_name.contains(pattern) || pattern.contains(service_name) {
debug!("INCLUDING service '{}' because it matches pattern '{}'", service_name, pattern);
services.push(service_name.to_string());
break;
}
@@ -571,140 +670,7 @@ impl SystemdCollector {
Some(estimated_gb)
}
/// Get nginx virtual hosts/sites
fn get_nginx_sites(&self) -> Vec<Metric> {
let mut metrics = Vec::new();
// Check sites-enabled directory
let output = Command::new("ls")
.arg("/etc/nginx/sites-enabled/")
.output();
if let Ok(output) = output {
if output.status.success() {
let output_str = String::from_utf8_lossy(&output.stdout);
for line in output_str.lines() {
let site_name = line.trim();
if !site_name.is_empty() && site_name != "default" {
// Check if site config is valid
let test_output = Command::new("nginx")
.arg("-t")
.arg("-c")
.arg(format!("/etc/nginx/sites-enabled/{}", site_name))
.output();
let status = match test_output {
Ok(out) if out.status.success() => Status::Ok,
_ => Status::Warning,
};
metrics.push(Metric {
name: format!("service_nginx_site_{}_status", site_name),
value: MetricValue::String(if status == Status::Ok { "active".to_string() } else { "error".to_string() }),
unit: None,
description: Some(format!("Nginx site {} configuration status", site_name)),
status,
timestamp: chrono::Utc::now().timestamp() as u64,
});
}
}
}
}
metrics
}
/// Get docker containers
fn get_docker_containers(&self) -> Vec<Metric> {
let mut metrics = Vec::new();
let output = Command::new("docker")
.arg("ps")
.arg("-a")
.arg("--format")
.arg("{{.Names}}\t{{.Status}}\t{{.State}}")
.output();
if let Ok(output) = output {
if output.status.success() {
let output_str = String::from_utf8_lossy(&output.stdout);
for line in output_str.lines() {
let parts: Vec<&str> = line.split('\t').collect();
if parts.len() >= 3 {
let container_name = parts[0].trim();
let status_info = parts[1].trim();
let state = parts[2].trim();
let status = match state.to_lowercase().as_str() {
"running" => Status::Ok,
"exited" | "dead" => Status::Warning,
"paused" | "restarting" => Status::Warning,
_ => Status::Critical,
};
metrics.push(Metric {
name: format!("service_docker_container_{}_status", container_name),
value: MetricValue::String(state.to_string()),
unit: None,
description: Some(format!("Docker container {} status: {}", container_name, status_info)),
status,
timestamp: chrono::Utc::now().timestamp() as u64,
});
// Get container memory usage
if state == "running" {
if let Some(memory_mb) = self.get_container_memory(container_name) {
metrics.push(Metric {
name: format!("service_docker_container_{}_memory_mb", container_name),
value: MetricValue::Float(memory_mb),
unit: Some("MB".to_string()),
description: Some(format!("Docker container {} memory usage", container_name)),
status: Status::Ok,
timestamp: chrono::Utc::now().timestamp() as u64,
});
}
}
}
}
}
}
metrics
}
/// Get container memory usage
fn get_container_memory(&self, container_name: &str) -> Option<f32> {
let output = Command::new("docker")
.arg("stats")
.arg("--no-stream")
.arg("--format")
.arg("{{.MemUsage}}")
.arg(container_name)
.output()
.ok()?;
if !output.status.success() {
return None;
}
let output_str = String::from_utf8(output.stdout).ok()?;
let mem_usage = output_str.trim();
// Parse format like "123.4MiB / 4GiB"
if let Some(used_part) = mem_usage.split(" / ").next() {
if used_part.ends_with("MiB") {
let num_str = used_part.trim_end_matches("MiB");
return num_str.parse::<f32>().ok();
} else if used_part.ends_with("GiB") {
let num_str = used_part.trim_end_matches("GiB");
if let Ok(gb) = num_str.parse::<f32>() {
return Some(gb * 1024.0); // Convert to MB
}
}
}
None
}
}
#[async_trait]
@@ -770,13 +736,11 @@ impl Collector for SystemdCollector {
// Sub-service metrics for specific services
if service.contains("nginx") && active_status == "active" {
let nginx_sites = self.get_nginx_sites();
metrics.extend(nginx_sites);
metrics.extend(self.get_nginx_site_metrics());
}
if service.contains("docker") && active_status == "active" {
let docker_containers = self.get_docker_containers();
metrics.extend(docker_containers);
metrics.extend(self.get_docker_containers());
}
}
Err(e) => {
@@ -795,4 +759,321 @@ impl Collector for SystemdCollector {
fn get_performance_metrics(&self) -> Option<PerformanceMetrics> {
None // Performance tracking handled by cache system
}
}
impl SystemdCollector {
/// Get nginx sites with latency checks
fn get_nginx_sites(&self) -> Vec<Metric> {
let mut metrics = Vec::new();
let timestamp = chrono::Utc::now().timestamp() as u64;
// Discover nginx sites from configuration
let sites = self.discover_nginx_sites();
for (site_name, url) in &sites {
match self.check_site_latency(url) {
Ok(latency_ms) => {
let status = if latency_ms < 500.0 {
Status::Ok
} else if latency_ms < 2000.0 {
Status::Warning
} else {
Status::Critical
};
metrics.push(Metric {
name: format!("service_nginx_{}_latency_ms", site_name),
value: MetricValue::Float(latency_ms),
unit: Some("ms".to_string()),
description: Some(format!("Response time for {}", url)),
status,
timestamp,
});
}
Err(_) => {
// Site is unreachable
metrics.push(Metric {
name: format!("service_nginx_{}_latency_ms", site_name),
value: MetricValue::Float(-1.0), // Use -1 to indicate error
unit: Some("ms".to_string()),
description: Some(format!("Response time for {} (unreachable)", url)),
status: Status::Critical,
timestamp,
});
}
}
}
metrics
}
/// Get docker containers as sub-services
fn get_docker_containers(&self) -> Vec<Metric> {
let mut metrics = Vec::new();
let timestamp = chrono::Utc::now().timestamp() as u64;
// Check if docker is available
let output = Command::new("docker")
.arg("ps")
.arg("--format")
.arg("{{.Names}},{{.Status}}")
.output();
let output = match output {
Ok(out) if out.status.success() => out,
_ => return metrics, // Docker not available or failed
};
let output_str = match String::from_utf8(output.stdout) {
Ok(s) => s,
Err(_) => return metrics,
};
for line in output_str.lines() {
if line.trim().is_empty() {
continue;
}
let parts: Vec<&str> = line.split(',').collect();
if parts.len() >= 2 {
let container_name = parts[0].trim();
let status_str = parts[1].trim();
let status = if status_str.contains("Up") {
Status::Ok
} else if status_str.contains("Exited") {
Status::Warning
} else {
Status::Critical
};
metrics.push(Metric {
name: format!("service_docker_{}_status", container_name),
value: MetricValue::String(status_str.to_string()),
unit: None,
description: Some(format!("Docker container {} status", container_name)),
status,
timestamp,
});
}
}
metrics
}
/// Check site latency using curl GET requests
fn check_site_latency(&self, url: &str) -> Result<f32, Box<dyn std::error::Error>> {
let _start = std::time::Instant::now();
let output = Command::new("curl")
.arg("-X")
.arg("GET") // Explicitly use GET method
.arg("-s")
.arg("-o")
.arg("/dev/null")
.arg("-w")
.arg("%{time_total}")
.arg("--max-time")
.arg("5") // 5 second timeout
.arg("--connect-timeout")
.arg("2") // 2 second connection timeout
.arg("--location") // Follow redirects
.arg("--fail") // Fail on HTTP errors (4xx, 5xx)
.arg(url)
.output()?;
if !output.status.success() {
return Err(format!("Curl GET request failed for {}", url).into());
}
let time_str = String::from_utf8(output.stdout)?;
let time_seconds: f32 = time_str.trim().parse()?;
let time_ms = time_seconds * 1000.0;
Ok(time_ms)
}
/// Discover nginx sites from configuration files (like the old working implementation)
fn discover_nginx_sites(&self) -> Vec<(String, String)> {
use tracing::debug;
// Use the same approach as the old working agent: get nginx config from systemd
let config_content = match self.get_nginx_config_from_systemd() {
Some(content) => content,
None => {
debug!("Could not get nginx config from systemd, trying nginx -T fallback");
match self.get_nginx_config_via_command() {
Some(content) => content,
None => {
debug!("Could not get nginx config via any method");
return Vec::new();
}
}
}
};
// Parse the config content to extract sites
self.parse_nginx_config_for_sites(&config_content)
}
/// Get nginx config from systemd service definition (NixOS compatible)
fn get_nginx_config_from_systemd(&self) -> Option<String> {
use tracing::debug;
let output = std::process::Command::new("systemctl")
.args(["show", "nginx", "--property=ExecStart", "--no-pager"])
.output()
.ok()?;
if !output.status.success() {
debug!("Failed to get nginx ExecStart from systemd");
return None;
}
let stdout = String::from_utf8_lossy(&output.stdout);
debug!("systemctl show nginx output: {}", stdout);
// Parse ExecStart to extract -c config path
for line in stdout.lines() {
if line.starts_with("ExecStart=") {
debug!("Found ExecStart line: {}", line);
// Handle both traditional and NixOS systemd formats
if let Some(config_path) = self.extract_config_path_from_exec_start(line) {
debug!("Extracted config path: {}", config_path);
// Read the config file
return std::fs::read_to_string(&config_path)
.map_err(|e| debug!("Failed to read config file {}: {}", config_path, e))
.ok();
}
}
}
None
}
/// Extract config path from ExecStart line
fn extract_config_path_from_exec_start(&self, exec_start: &str) -> Option<String> {
use tracing::debug;
// Remove ExecStart= prefix
let exec_part = exec_start.strip_prefix("ExecStart=")?;
debug!("Parsing exec part: {}", exec_part);
// Handle NixOS format: ExecStart={ path=...; argv[]=...nginx -c /config; ... }
if exec_part.contains("argv[]=") {
// Extract the part after argv[]=
let argv_start = exec_part.find("argv[]=")?;
let argv_part = &exec_part[argv_start + 7..]; // Skip "argv[]="
debug!("Found NixOS argv part: {}", argv_part);
// Look for -c flag followed by config path
if let Some(c_pos) = argv_part.find(" -c ") {
let after_c = &argv_part[c_pos + 4..];
// Find the config path (until next space or semicolon)
let config_path = after_c.split([' ', ';']).next()?;
return Some(config_path.to_string());
}
} else {
// Handle traditional format: ExecStart=/path/nginx -c /config
debug!("Parsing traditional format");
if let Some(c_pos) = exec_part.find(" -c ") {
let after_c = &exec_part[c_pos + 4..];
let config_path = after_c.split_whitespace().next()?;
return Some(config_path.to_string());
}
}
None
}
/// Fallback: get nginx config via nginx -T command
fn get_nginx_config_via_command(&self) -> Option<String> {
use tracing::debug;
let output = std::process::Command::new("nginx")
.args(["-T"])
.output()
.ok()?;
if !output.status.success() {
debug!("nginx -T failed");
return None;
}
Some(String::from_utf8_lossy(&output.stdout).to_string())
}
/// Parse nginx config content to extract server names and build site list
fn parse_nginx_config_for_sites(&self, config_content: &str) -> Vec<(String, String)> {
use tracing::debug;
let mut sites = Vec::new();
let lines: Vec<&str> = config_content.lines().collect();
let mut i = 0;
debug!("Parsing nginx config with {} lines", lines.len());
while i < lines.len() {
let line = lines[i].trim();
if line.starts_with("server") && line.contains("{") {
debug!("Found server block at line {}", i);
if let Some(server_name) = self.parse_server_block(&lines, &mut i) {
debug!("Extracted server name: {}", server_name);
let url = format!("https://{}", server_name);
// Use the full domain as the site name for clarity
sites.push((server_name.clone(), url));
}
}
i += 1;
}
debug!("Discovered {} nginx sites total", sites.len());
sites
}
/// Parse a server block to extract the primary server_name
fn parse_server_block(&self, lines: &[&str], start_index: &mut usize) -> Option<String> {
use tracing::debug;
let mut server_names = Vec::new();
let mut has_redirect = false;
let mut i = *start_index + 1;
let mut brace_count = 1;
// Parse until we close the server block
while i < lines.len() && brace_count > 0 {
let trimmed = lines[i].trim();
// Track braces
brace_count += trimmed.matches('{').count();
brace_count -= trimmed.matches('}').count();
// Extract server_name
if trimmed.starts_with("server_name") {
if let Some(names_part) = trimmed.strip_prefix("server_name") {
let names_clean = names_part.trim().trim_end_matches(';');
for name in names_clean.split_whitespace() {
if name != "_" && !name.is_empty() && name.contains('.') && !name.starts_with('$') {
server_names.push(name.to_string());
debug!("Found server_name in block: {}", name);
}
}
}
}
// Check for redirects (skip redirect-only servers)
if trimmed.contains("return") && (trimmed.contains("301") || trimmed.contains("302")) {
has_redirect = true;
}
i += 1;
}
*start_index = i - 1;
// Only return hostnames that are not redirects and have actual content
if !server_names.is_empty() && !has_redirect {
Some(server_names[0].clone())
} else {
None
}
}
}