From c006625a3f6a86fcac0c5196d73a6d5b2df9b1d9 Mon Sep 17 00:00:00 2001 From: Christoffer Martinsson Date: Mon, 24 Nov 2025 21:51:42 +0100 Subject: [PATCH] Restore complete systemd collector functionality - Enhanced directory size logic with minimum 0.001GB visibility and permission error logging - Added nginx site monitoring with latency checks and NixOS config discovery - Added docker container monitoring as sub-services - Integrated sub-service collection for active nginx and docker services - All missing features from original implementation now restored --- agent/Cargo.toml | 2 +- agent/src/collectors/systemd.rs | 247 ++++++++++++++++++++++++++++++-- dashboard/Cargo.toml | 2 +- shared/Cargo.toml | 2 +- 4 files changed, 241 insertions(+), 12 deletions(-) diff --git a/agent/Cargo.toml b/agent/Cargo.toml index e79de48..2d834a7 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-agent" -version = "0.1.143" +version = "0.1.144" edition = "2021" [dependencies] diff --git a/agent/src/collectors/systemd.rs b/agent/src/collectors/systemd.rs index 736fa9e..cd9c9b8 100644 --- a/agent/src/collectors/systemd.rs +++ b/agent/src/collectors/systemd.rs @@ -92,11 +92,42 @@ impl SystemdCollector { let service_info = ServiceInfo { name: service_name.clone(), - status: active_status, + status: active_status.clone(), memory_mb, disk_gb, }; services.push(service_info); + + // Sub-service metrics for specific services + if service_name.contains("nginx") && active_status == "active" { + let nginx_sites = self.get_nginx_sites(); + for (site_name, latency_ms) in nginx_sites { + let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms { + "active" + } else { + "failed" + }; + + services.push(ServiceInfo { + name: site_name, + status: site_status.to_string(), + memory_mb: 0.0, + disk_gb: latency_ms / 1000.0, // Store latency in disk_gb field as workaround + }); + } + } + + if service_name.contains("docker") && active_status == "active" { + let docker_containers = self.get_docker_containers(); + for (container_name, container_status) in docker_containers { + services.push(ServiceInfo { + name: container_name, + status: container_status, + memory_mb: 0.0, + disk_gb: 0.0, + }); + } + } } Err(e) => { debug!("Failed to get status for service {}: {}", service_name, e); @@ -358,7 +389,7 @@ impl SystemdCollector { for line in output_str.lines() { if line.starts_with("WorkingDirectory=") && !line.contains("[not set]") { let dir = line.strip_prefix("WorkingDirectory=").unwrap_or(""); - if !dir.is_empty() { + if !dir.is_empty() && dir != "/" { return Ok(self.get_directory_size(dir).unwrap_or(0.0)); } } @@ -375,18 +406,29 @@ impl SystemdCollector { .ok()?; if !output.status.success() { + // Log permission errors for debugging but don't spam logs + let stderr = String::from_utf8_lossy(&output.stderr); + if stderr.contains("Permission denied") { + debug!("Permission denied accessing directory: {}", path); + } else { + debug!("Failed to get size for directory {}: {}", path, stderr); + } return None; } - let output_str = String::from_utf8_lossy(&output.stdout); - let parts: Vec<&str> = output_str.split_whitespace().collect(); - if let Some(size_str) = parts.first() { - if let Ok(size_bytes) = size_str.parse::() { - return Some(size_bytes as f32 / (1024.0 * 1024.0 * 1024.0)); + let output_str = String::from_utf8(output.stdout).ok()?; + let size_str = output_str.split_whitespace().next()?; + if let Ok(size_bytes) = size_str.parse::() { + let size_gb = size_bytes as f32 / (1024.0 * 1024.0 * 1024.0); + // Return size even if very small (minimum 0.001 GB = 1MB for visibility) + if size_gb > 0.0 { + Some(size_gb.max(0.001)) + } else { + None } + } else { + None } - - None } /// Calculate service status, taking user-stopped services into account @@ -455,6 +497,193 @@ impl SystemdCollector { None } } + + /// Get nginx sites with latency checks + fn get_nginx_sites(&self) -> Vec<(String, f32)> { + let mut sites = Vec::new(); + + // Discover nginx sites from configuration + let discovered_sites = self.discover_nginx_sites(); + + for (site_name, url) in &discovered_sites { + match self.check_site_latency(url) { + Ok(latency_ms) => { + sites.push((format!("nginx_{}", site_name), latency_ms)); + } + Err(_) => { + // Site is unreachable - use -1.0 to indicate error + sites.push((format!("nginx_{}", site_name), -1.0)); + } + } + } + + sites + } + + /// Discover nginx sites from configuration + fn discover_nginx_sites(&self) -> Vec<(String, String)> { + let mut sites = Vec::new(); + + // Try to get nginx config from systemd service definition (NixOS compatible) + if let Some(config_content) = self.get_nginx_config_from_systemd() { + sites.extend(self.parse_nginx_config_for_sites(&config_content)); + } + + // Fallback: try standard nginx config paths + if sites.is_empty() { + for config_path in ["/etc/nginx/nginx.conf", "/usr/local/nginx/conf/nginx.conf"] { + if let Ok(config_content) = std::fs::read_to_string(config_path) { + sites.extend(self.parse_nginx_config_for_sites(&config_content)); + break; + } + } + } + + sites + } + + /// Get nginx config from systemd service definition (NixOS compatible) + fn get_nginx_config_from_systemd(&self) -> Option { + let output = Command::new("systemctl") + .args(&["show", "nginx", "--property=ExecStart", "--no-pager"]) + .output() + .ok()?; + + if !output.status.success() { + debug!("Failed to get nginx ExecStart from systemd"); + return None; + } + + let stdout = String::from_utf8_lossy(&output.stdout); + debug!("systemctl show nginx output: {}", stdout); + + // Parse ExecStart to extract -c config path + for line in stdout.lines() { + if line.starts_with("ExecStart=") { + debug!("Found ExecStart line: {}", line); + if let Some(config_path) = self.extract_config_path_from_exec_start(line) { + debug!("Extracted config path: {}", config_path); + return std::fs::read_to_string(&config_path).ok(); + } + } + } + + None + } + + /// Extract config path from ExecStart line + fn extract_config_path_from_exec_start(&self, exec_start_line: &str) -> Option { + // Handle both traditional and NixOS systemd formats + let parts: Vec<&str> = exec_start_line.split_whitespace().collect(); + + for (i, part) in parts.iter().enumerate() { + if part == &"-c" && i + 1 < parts.len() { + return Some(parts[i + 1].to_string()); + } + } + + None + } + + /// Parse nginx config content to extract sites + fn parse_nginx_config_for_sites(&self, config_content: &str) -> Vec<(String, String)> { + let mut sites = Vec::new(); + let mut current_server_name: Option = None; + let mut current_listen_port: Option = None; + let mut in_server_block = false; + + for line in config_content.lines() { + let line = line.trim(); + + if line.starts_with("server {") { + in_server_block = true; + current_server_name = None; + current_listen_port = None; + } else if line == "}" && in_server_block { + // End of server block - create site entry if we have both name and port + if let (Some(name), Some(port)) = (¤t_server_name, ¤t_listen_port) { + let url = format!("http://{}:{}", name, port); + sites.push((name.clone(), url)); + } + in_server_block = false; + } else if in_server_block { + if line.starts_with("server_name ") { + if let Some(name) = line.split_whitespace().nth(1) { + current_server_name = Some(name.trim_end_matches(';').to_string()); + } + } else if line.starts_with("listen ") { + if let Some(listen_spec) = line.split_whitespace().nth(1) { + let port_str = listen_spec.trim_end_matches(';').split(':').last().unwrap_or(listen_spec); + if let Ok(port) = port_str.parse::() { + current_listen_port = Some(port); + } + } + } + } + } + + sites + } + + /// Check site latency via HTTP request + fn check_site_latency(&self, url: &str) -> Result> { + use std::time::Instant; + + let start = Instant::now(); + + // Use curl for HTTP request with timeout + let output = Command::new("curl") + .args(&[ + "-s", + "-o", "/dev/null", + "-w", "%{http_code}", + "--max-time", &self.config.http_timeout_seconds.to_string(), + "--connect-timeout", &self.config.http_connect_timeout_seconds.to_string(), + url + ]) + .output()?; + + let elapsed = start.elapsed(); + let latency_ms = elapsed.as_secs_f32() * 1000.0; + + if output.status.success() { + let http_code = String::from_utf8_lossy(&output.stdout); + if http_code.starts_with("2") || http_code.starts_with("3") { + Ok(latency_ms) + } else { + Err(format!("HTTP error: {}", http_code).into()) + } + } else { + Err("HTTP request failed".into()) + } + } + + /// Get docker containers as sub-services + fn get_docker_containers(&self) -> Vec<(String, String)> { + let mut containers = Vec::new(); + + let output = Command::new("docker") + .args(&["ps", "--format", "{{.Names}}:{{.Status}}"]) + .output(); + + if let Ok(output) = output { + if output.status.success() { + let output_str = String::from_utf8_lossy(&output.stdout); + for line in output_str.lines() { + if let Some((name, status)) = line.split_once(':') { + let container_status = if status.contains("Up") { + "active" + } else { + "inactive" + }; + containers.push((format!("docker_{}", name), container_status.to_string())); + } + } + } + } + + containers + } } #[async_trait] diff --git a/dashboard/Cargo.toml b/dashboard/Cargo.toml index 1ef04dd..7151674 100644 --- a/dashboard/Cargo.toml +++ b/dashboard/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard" -version = "0.1.143" +version = "0.1.144" edition = "2021" [dependencies] diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 23f6781..5b34e97 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-shared" -version = "0.1.143" +version = "0.1.144" edition = "2021" [dependencies]