diff --git a/Cargo.lock b/Cargo.lock index 9140432..11a2738 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cm-dashboard" -version = "0.1.142" +version = "0.1.144" dependencies = [ "anyhow", "chrono", @@ -301,7 +301,7 @@ dependencies = [ [[package]] name = "cm-dashboard-agent" -version = "0.1.142" +version = "0.1.144" dependencies = [ "anyhow", "async-trait", @@ -324,7 +324,7 @@ dependencies = [ [[package]] name = "cm-dashboard-shared" -version = "0.1.142" +version = "0.1.144" dependencies = [ "chrono", "serde", diff --git a/agent/Cargo.toml b/agent/Cargo.toml index 2d834a7..836e962 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-agent" -version = "0.1.144" +version = "0.1.145" edition = "2021" [dependencies] diff --git a/agent/src/collectors/systemd.rs b/agent/src/collectors/systemd.rs index cd9c9b8..c2a6ef7 100644 --- a/agent/src/collectors/systemd.rs +++ b/agent/src/collectors/systemd.rs @@ -32,6 +32,12 @@ struct ServiceCacheState { last_discovery_time: Option, /// How often to rediscover services (from config) discovery_interval_seconds: u64, + /// Cached nginx site latency metrics + nginx_site_metrics: Vec<(String, f32)>, + /// Last time nginx sites were checked + last_nginx_check_time: Option, + /// How often to check nginx site latency (configurable) + nginx_check_interval_seconds: u64, } /// Cached service status information from systemctl list-units @@ -60,6 +66,9 @@ impl SystemdCollector { service_status_cache: std::collections::HashMap::new(), last_discovery_time: None, discovery_interval_seconds: config.interval_seconds, + nginx_site_metrics: Vec::new(), + last_nginx_check_time: None, + nginx_check_interval_seconds: config.nginx_check_interval_seconds, }; Self { @@ -100,7 +109,7 @@ impl SystemdCollector { // Sub-service metrics for specific services if service_name.contains("nginx") && active_status == "active" { - let nginx_sites = self.get_nginx_sites(); + let nginx_sites = self.get_nginx_site_metrics(); for (site_name, latency_ms) in nginx_sites { let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms { "active" @@ -198,6 +207,35 @@ impl SystemdCollector { Ok(state.monitored_services.clone()) } + /// Get nginx site metrics, checking them if cache is expired + fn get_nginx_site_metrics(&self) -> Vec<(String, f32)> { + let mut state = self.state.write().unwrap(); + + // Check if we need to refresh nginx site metrics + let needs_refresh = match state.last_nginx_check_time { + None => true, // First time + Some(last_time) => { + let elapsed = last_time.elapsed().as_secs(); + elapsed >= state.nginx_check_interval_seconds + } + }; + + if needs_refresh { + // Only check nginx sites if nginx service is active + if state.monitored_services.iter().any(|s| s.contains("nginx")) { + debug!( + "Refreshing nginx site latency metrics (interval: {}s)", + state.nginx_check_interval_seconds + ); + let fresh_metrics = self.get_nginx_sites_internal(); + state.nginx_site_metrics = fresh_metrics; + state.last_nginx_check_time = Some(Instant::now()); + } + } + + state.nginx_site_metrics.clone() + } + /// Auto-discover interesting services to monitor fn discover_services_internal(&self) -> Result<(Vec, std::collections::HashMap)> { // First: Get all service unit files @@ -400,8 +438,8 @@ impl SystemdCollector { /// Get size of a directory in GB fn get_directory_size(&self, path: &str) -> Option { - let output = Command::new("du") - .args(&["-sb", path]) + let output = Command::new("sudo") + .args(&["du", "-sb", path]) .output() .ok()?; @@ -431,6 +469,25 @@ impl SystemdCollector { } } + /// Get service memory usage (if available) + fn get_service_memory(&self, service: &str) -> Option { + let output = Command::new("systemctl") + .args(&["show", &format!("{}.service", service), "--property=MemoryCurrent"]) + .output() + .ok()?; + + let output_str = String::from_utf8(output.stdout).ok()?; + for line in output_str.lines() { + if line.starts_with("MemoryCurrent=") { + let memory_str = line.strip_prefix("MemoryCurrent=")?; + if let Ok(memory_bytes) = memory_str.parse::() { + return Some(memory_bytes as f32 / (1024.0 * 1024.0)); // Convert to MB + } + } + } + None + } + /// Calculate service status, taking user-stopped services into account fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status { match active_status.to_lowercase().as_str() { @@ -498,8 +555,8 @@ impl SystemdCollector { } } - /// Get nginx sites with latency checks - fn get_nginx_sites(&self) -> Vec<(String, f32)> { + /// Get nginx sites with latency checks (internal - no caching) + fn get_nginx_sites_internal(&self) -> Vec<(String, f32)> { let mut sites = Vec::new(); // Discover nginx sites from configuration @@ -522,24 +579,38 @@ impl SystemdCollector { /// Discover nginx sites from configuration fn discover_nginx_sites(&self) -> Vec<(String, String)> { - let mut sites = Vec::new(); - - // Try to get nginx config from systemd service definition (NixOS compatible) - if let Some(config_content) = self.get_nginx_config_from_systemd() { - sites.extend(self.parse_nginx_config_for_sites(&config_content)); - } - - // Fallback: try standard nginx config paths - if sites.is_empty() { - for config_path in ["/etc/nginx/nginx.conf", "/usr/local/nginx/conf/nginx.conf"] { - if let Ok(config_content) = std::fs::read_to_string(config_path) { - sites.extend(self.parse_nginx_config_for_sites(&config_content)); - break; + // Use the same approach as the old working agent: get nginx config from systemd + let config_content = match self.get_nginx_config_from_systemd() { + Some(content) => content, + None => { + debug!("Could not get nginx config from systemd, trying nginx -T fallback"); + match self.get_nginx_config_via_command() { + Some(content) => content, + None => { + debug!("Could not get nginx config via any method"); + return Vec::new(); + } } } + }; + + // Parse the config content to extract sites + self.parse_nginx_config_for_sites(&config_content) + } + + /// Fallback: get nginx config via nginx -T command + fn get_nginx_config_via_command(&self) -> Option { + let output = Command::new("nginx") + .args(&["-T"]) + .output() + .ok()?; + + if !output.status.success() { + debug!("nginx -T failed"); + return None; } - sites + Some(String::from_utf8_lossy(&output.stdout).to_string()) } /// Get nginx config from systemd service definition (NixOS compatible) @@ -572,89 +643,138 @@ impl SystemdCollector { } /// Extract config path from ExecStart line - fn extract_config_path_from_exec_start(&self, exec_start_line: &str) -> Option { - // Handle both traditional and NixOS systemd formats - let parts: Vec<&str> = exec_start_line.split_whitespace().collect(); - - for (i, part) in parts.iter().enumerate() { - if part == &"-c" && i + 1 < parts.len() { - return Some(parts[i + 1].to_string()); + fn extract_config_path_from_exec_start(&self, exec_start: &str) -> Option { + // Remove ExecStart= prefix + let exec_part = exec_start.strip_prefix("ExecStart=")?; + debug!("Parsing exec part: {}", exec_part); + + // Handle NixOS format: ExecStart={ path=...; argv[]=...nginx -c /config; ... } + if exec_part.contains("argv[]=") { + // Extract the part after argv[]= + let argv_start = exec_part.find("argv[]=")?; + let argv_part = &exec_part[argv_start + 7..]; // Skip "argv[]=" + debug!("Found NixOS argv part: {}", argv_part); + + // Look for -c flag followed by config path + if let Some(c_pos) = argv_part.find(" -c ") { + let after_c = &argv_part[c_pos + 4..]; + // Find the config path (until next space or semicolon) + let config_path = after_c.split([' ', ';']).next()?; + return Some(config_path.to_string()); + } + } else { + // Handle traditional format: ExecStart=/path/nginx -c /config + debug!("Parsing traditional format"); + if let Some(c_pos) = exec_part.find(" -c ") { + let after_c = &exec_part[c_pos + 4..]; + let config_path = after_c.split_whitespace().next()?; + return Some(config_path.to_string()); } } - + None } - /// Parse nginx config content to extract sites + /// Parse nginx config content to extract server names and build site list fn parse_nginx_config_for_sites(&self, config_content: &str) -> Vec<(String, String)> { let mut sites = Vec::new(); - let mut current_server_name: Option = None; - let mut current_listen_port: Option = None; - let mut in_server_block = false; + let lines: Vec<&str> = config_content.lines().collect(); + let mut i = 0; - for line in config_content.lines() { - let line = line.trim(); - - if line.starts_with("server {") { - in_server_block = true; - current_server_name = None; - current_listen_port = None; - } else if line == "}" && in_server_block { - // End of server block - create site entry if we have both name and port - if let (Some(name), Some(port)) = (¤t_server_name, ¤t_listen_port) { - let url = format!("http://{}:{}", name, port); - sites.push((name.clone(), url)); + debug!("Parsing nginx config with {} lines", lines.len()); + + while i < lines.len() { + let line = lines[i].trim(); + if line.starts_with("server") && line.contains("{") { + if let Some(server_name) = self.parse_server_block(&lines, &mut i) { + let url = format!("https://{}", server_name); + sites.push((server_name.clone(), url)); } - in_server_block = false; - } else if in_server_block { - if line.starts_with("server_name ") { - if let Some(name) = line.split_whitespace().nth(1) { - current_server_name = Some(name.trim_end_matches(';').to_string()); - } - } else if line.starts_with("listen ") { - if let Some(listen_spec) = line.split_whitespace().nth(1) { - let port_str = listen_spec.trim_end_matches(';').split(':').last().unwrap_or(listen_spec); - if let Ok(port) = port_str.parse::() { - current_listen_port = Some(port); + } + i += 1; + } + + debug!("Discovered {} nginx sites total", sites.len()); + sites + } + + /// Parse a server block to extract the primary server_name + fn parse_server_block(&self, lines: &[&str], start_index: &mut usize) -> Option { + let mut server_names = Vec::new(); + let mut has_redirect = false; + let mut i = *start_index + 1; + let mut brace_count = 1; + + // Parse until we close the server block + while i < lines.len() && brace_count > 0 { + let trimmed = lines[i].trim(); + + // Track braces + brace_count += trimmed.matches('{').count(); + brace_count -= trimmed.matches('}').count(); + + // Extract server_name + if trimmed.starts_with("server_name") { + if let Some(names_part) = trimmed.strip_prefix("server_name") { + let names_clean = names_part.trim().trim_end_matches(';'); + for name in names_clean.split_whitespace() { + if name != "_" + && !name.is_empty() + && name.contains('.') + && !name.starts_with('$') + { + server_names.push(name.to_string()); + debug!("Found server_name in block: {}", name); } } } } + + // Check for redirects (skip redirect-only servers) + if trimmed.contains("return") && (trimmed.contains("301") || trimmed.contains("302")) { + has_redirect = true; + } + + i += 1; } - sites + *start_index = i - 1; + + if !server_names.is_empty() && !has_redirect { + return Some(server_names[0].clone()); + } + + None } - /// Check site latency via HTTP request + /// Check site latency using HTTP GET requests fn check_site_latency(&self, url: &str) -> Result> { + use std::time::Duration; use std::time::Instant; let start = Instant::now(); - - // Use curl for HTTP request with timeout - let output = Command::new("curl") - .args(&[ - "-s", - "-o", "/dev/null", - "-w", "%{http_code}", - "--max-time", &self.config.http_timeout_seconds.to_string(), - "--connect-timeout", &self.config.http_connect_timeout_seconds.to_string(), - url - ]) - .output()?; - let elapsed = start.elapsed(); - let latency_ms = elapsed.as_secs_f32() * 1000.0; + // Create HTTP client with timeouts from configuration + let client = reqwest::blocking::Client::builder() + .timeout(Duration::from_secs(self.config.http_timeout_seconds)) + .connect_timeout(Duration::from_secs(self.config.http_connect_timeout_seconds)) + .redirect(reqwest::redirect::Policy::limited(10)) + .build()?; - if output.status.success() { - let http_code = String::from_utf8_lossy(&output.stdout); - if http_code.starts_with("2") || http_code.starts_with("3") { - Ok(latency_ms) - } else { - Err(format!("HTTP error: {}", http_code).into()) - } + // Make GET request and measure latency + let response = client.get(url).send()?; + let latency = start.elapsed().as_millis() as f32; + + // Check if response is successful (2xx or 3xx status codes) + if response.status().is_success() || response.status().is_redirection() { + Ok(latency) } else { - Err("HTTP request failed".into()) + Err(format!( + "HTTP request failed for {} with status: {}", + url, + response.status() + ) + .into()) } } @@ -662,23 +782,40 @@ impl SystemdCollector { fn get_docker_containers(&self) -> Vec<(String, String)> { let mut containers = Vec::new(); + // Check if docker is available let output = Command::new("docker") - .args(&["ps", "--format", "{{.Names}}:{{.Status}}"]) + .args(&["ps", "--format", "{{.Names}},{{.Status}}"]) .output(); - if let Ok(output) = output { - if output.status.success() { - let output_str = String::from_utf8_lossy(&output.stdout); - for line in output_str.lines() { - if let Some((name, status)) = line.split_once(':') { - let container_status = if status.contains("Up") { - "active" - } else { - "inactive" - }; - containers.push((format!("docker_{}", name), container_status.to_string())); - } - } + let output = match output { + Ok(out) if out.status.success() => out, + _ => return containers, // Docker not available or failed + }; + + let output_str = match String::from_utf8(output.stdout) { + Ok(s) => s, + Err(_) => return containers, + }; + + for line in output_str.lines() { + if line.trim().is_empty() { + continue; + } + + let parts: Vec<&str> = line.split(',').collect(); + if parts.len() >= 2 { + let container_name = parts[0].trim(); + let status_str = parts[1].trim(); + + let container_status = if status_str.contains("Up") { + "active" + } else if status_str.contains("Exited") { + "warning" // Match original: Exited → Warning, not inactive + } else { + "failed" // Other states → failed + }; + + containers.push((format!("docker_{}", container_name), container_status.to_string())); } } diff --git a/dashboard/Cargo.toml b/dashboard/Cargo.toml index 7151674..feb50c5 100644 --- a/dashboard/Cargo.toml +++ b/dashboard/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard" -version = "0.1.144" +version = "0.1.145" edition = "2021" [dependencies] diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 5b34e97..5f69677 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-shared" -version = "0.1.144" +version = "0.1.145" edition = "2021" [dependencies]