Complete systemd collector restoration matching original architecture
All checks were successful
Build and Release / build-and-release (push) Successful in 2m8s

- Add nginx site metrics caching with configurable intervals matching original
- Implement complex nginx config parsing with brace counting and redirect detection
- Replace curl with reqwest HTTP client for proper timeout and redirect handling
- Fix docker container parsing to use comma format with proper status mapping
- Add sudo to directory size command for permission handling
- Change nginx URLs to use https protocol matching original
- Add advanced NixOS ExecStart parsing for argv[] format support
- Add nginx -T fallback functionality for config discovery
- Implement proper server block parsing with domain validation and brace tracking
- Add get_service_memory function matching original signature

All functionality now matches pre-refactor implementation architecture.
This commit is contained in:
Christoffer Martinsson 2025-11-24 22:02:06 +01:00
parent c006625a3f
commit eb892096d9
5 changed files with 235 additions and 98 deletions

6
Cargo.lock generated
View File

@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
[[package]] [[package]]
name = "cm-dashboard" name = "cm-dashboard"
version = "0.1.142" version = "0.1.144"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"chrono", "chrono",
@ -301,7 +301,7 @@ dependencies = [
[[package]] [[package]]
name = "cm-dashboard-agent" name = "cm-dashboard-agent"
version = "0.1.142" version = "0.1.144"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-trait", "async-trait",
@ -324,7 +324,7 @@ dependencies = [
[[package]] [[package]]
name = "cm-dashboard-shared" name = "cm-dashboard-shared"
version = "0.1.142" version = "0.1.144"
dependencies = [ dependencies = [
"chrono", "chrono",
"serde", "serde",

View File

@ -1,6 +1,6 @@
[package] [package]
name = "cm-dashboard-agent" name = "cm-dashboard-agent"
version = "0.1.144" version = "0.1.145"
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@ -32,6 +32,12 @@ struct ServiceCacheState {
last_discovery_time: Option<Instant>, last_discovery_time: Option<Instant>,
/// How often to rediscover services (from config) /// How often to rediscover services (from config)
discovery_interval_seconds: u64, discovery_interval_seconds: u64,
/// Cached nginx site latency metrics
nginx_site_metrics: Vec<(String, f32)>,
/// Last time nginx sites were checked
last_nginx_check_time: Option<Instant>,
/// How often to check nginx site latency (configurable)
nginx_check_interval_seconds: u64,
} }
/// Cached service status information from systemctl list-units /// Cached service status information from systemctl list-units
@ -60,6 +66,9 @@ impl SystemdCollector {
service_status_cache: std::collections::HashMap::new(), service_status_cache: std::collections::HashMap::new(),
last_discovery_time: None, last_discovery_time: None,
discovery_interval_seconds: config.interval_seconds, discovery_interval_seconds: config.interval_seconds,
nginx_site_metrics: Vec::new(),
last_nginx_check_time: None,
nginx_check_interval_seconds: config.nginx_check_interval_seconds,
}; };
Self { Self {
@ -100,7 +109,7 @@ impl SystemdCollector {
// Sub-service metrics for specific services // Sub-service metrics for specific services
if service_name.contains("nginx") && active_status == "active" { if service_name.contains("nginx") && active_status == "active" {
let nginx_sites = self.get_nginx_sites(); let nginx_sites = self.get_nginx_site_metrics();
for (site_name, latency_ms) in nginx_sites { for (site_name, latency_ms) in nginx_sites {
let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms { let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms {
"active" "active"
@ -198,6 +207,35 @@ impl SystemdCollector {
Ok(state.monitored_services.clone()) Ok(state.monitored_services.clone())
} }
/// Get nginx site metrics, checking them if cache is expired
fn get_nginx_site_metrics(&self) -> Vec<(String, f32)> {
let mut state = self.state.write().unwrap();
// Check if we need to refresh nginx site metrics
let needs_refresh = match state.last_nginx_check_time {
None => true, // First time
Some(last_time) => {
let elapsed = last_time.elapsed().as_secs();
elapsed >= state.nginx_check_interval_seconds
}
};
if needs_refresh {
// Only check nginx sites if nginx service is active
if state.monitored_services.iter().any(|s| s.contains("nginx")) {
debug!(
"Refreshing nginx site latency metrics (interval: {}s)",
state.nginx_check_interval_seconds
);
let fresh_metrics = self.get_nginx_sites_internal();
state.nginx_site_metrics = fresh_metrics;
state.last_nginx_check_time = Some(Instant::now());
}
}
state.nginx_site_metrics.clone()
}
/// Auto-discover interesting services to monitor /// Auto-discover interesting services to monitor
fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> { fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
// First: Get all service unit files // First: Get all service unit files
@ -400,8 +438,8 @@ impl SystemdCollector {
/// Get size of a directory in GB /// Get size of a directory in GB
fn get_directory_size(&self, path: &str) -> Option<f32> { fn get_directory_size(&self, path: &str) -> Option<f32> {
let output = Command::new("du") let output = Command::new("sudo")
.args(&["-sb", path]) .args(&["du", "-sb", path])
.output() .output()
.ok()?; .ok()?;
@ -431,6 +469,25 @@ impl SystemdCollector {
} }
} }
/// Get service memory usage (if available)
fn get_service_memory(&self, service: &str) -> Option<f32> {
let output = Command::new("systemctl")
.args(&["show", &format!("{}.service", service), "--property=MemoryCurrent"])
.output()
.ok()?;
let output_str = String::from_utf8(output.stdout).ok()?;
for line in output_str.lines() {
if line.starts_with("MemoryCurrent=") {
let memory_str = line.strip_prefix("MemoryCurrent=")?;
if let Ok(memory_bytes) = memory_str.parse::<u64>() {
return Some(memory_bytes as f32 / (1024.0 * 1024.0)); // Convert to MB
}
}
}
None
}
/// Calculate service status, taking user-stopped services into account /// Calculate service status, taking user-stopped services into account
fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status { fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status {
match active_status.to_lowercase().as_str() { match active_status.to_lowercase().as_str() {
@ -498,8 +555,8 @@ impl SystemdCollector {
} }
} }
/// Get nginx sites with latency checks /// Get nginx sites with latency checks (internal - no caching)
fn get_nginx_sites(&self) -> Vec<(String, f32)> { fn get_nginx_sites_internal(&self) -> Vec<(String, f32)> {
let mut sites = Vec::new(); let mut sites = Vec::new();
// Discover nginx sites from configuration // Discover nginx sites from configuration
@ -522,24 +579,38 @@ impl SystemdCollector {
/// Discover nginx sites from configuration /// Discover nginx sites from configuration
fn discover_nginx_sites(&self) -> Vec<(String, String)> { fn discover_nginx_sites(&self) -> Vec<(String, String)> {
let mut sites = Vec::new(); // Use the same approach as the old working agent: get nginx config from systemd
let config_content = match self.get_nginx_config_from_systemd() {
// Try to get nginx config from systemd service definition (NixOS compatible) Some(content) => content,
if let Some(config_content) = self.get_nginx_config_from_systemd() { None => {
sites.extend(self.parse_nginx_config_for_sites(&config_content)); debug!("Could not get nginx config from systemd, trying nginx -T fallback");
} match self.get_nginx_config_via_command() {
Some(content) => content,
// Fallback: try standard nginx config paths None => {
if sites.is_empty() { debug!("Could not get nginx config via any method");
for config_path in ["/etc/nginx/nginx.conf", "/usr/local/nginx/conf/nginx.conf"] { return Vec::new();
if let Ok(config_content) = std::fs::read_to_string(config_path) { }
sites.extend(self.parse_nginx_config_for_sites(&config_content));
break;
} }
} }
};
// Parse the config content to extract sites
self.parse_nginx_config_for_sites(&config_content)
}
/// Fallback: get nginx config via nginx -T command
fn get_nginx_config_via_command(&self) -> Option<String> {
let output = Command::new("nginx")
.args(&["-T"])
.output()
.ok()?;
if !output.status.success() {
debug!("nginx -T failed");
return None;
} }
sites Some(String::from_utf8_lossy(&output.stdout).to_string())
} }
/// Get nginx config from systemd service definition (NixOS compatible) /// Get nginx config from systemd service definition (NixOS compatible)
@ -572,89 +643,138 @@ impl SystemdCollector {
} }
/// Extract config path from ExecStart line /// Extract config path from ExecStart line
fn extract_config_path_from_exec_start(&self, exec_start_line: &str) -> Option<String> { fn extract_config_path_from_exec_start(&self, exec_start: &str) -> Option<String> {
// Handle both traditional and NixOS systemd formats // Remove ExecStart= prefix
let parts: Vec<&str> = exec_start_line.split_whitespace().collect(); let exec_part = exec_start.strip_prefix("ExecStart=")?;
debug!("Parsing exec part: {}", exec_part);
for (i, part) in parts.iter().enumerate() {
if part == &"-c" && i + 1 < parts.len() { // Handle NixOS format: ExecStart={ path=...; argv[]=...nginx -c /config; ... }
return Some(parts[i + 1].to_string()); if exec_part.contains("argv[]=") {
// Extract the part after argv[]=
let argv_start = exec_part.find("argv[]=")?;
let argv_part = &exec_part[argv_start + 7..]; // Skip "argv[]="
debug!("Found NixOS argv part: {}", argv_part);
// Look for -c flag followed by config path
if let Some(c_pos) = argv_part.find(" -c ") {
let after_c = &argv_part[c_pos + 4..];
// Find the config path (until next space or semicolon)
let config_path = after_c.split([' ', ';']).next()?;
return Some(config_path.to_string());
}
} else {
// Handle traditional format: ExecStart=/path/nginx -c /config
debug!("Parsing traditional format");
if let Some(c_pos) = exec_part.find(" -c ") {
let after_c = &exec_part[c_pos + 4..];
let config_path = after_c.split_whitespace().next()?;
return Some(config_path.to_string());
} }
} }
None None
} }
/// Parse nginx config content to extract sites /// Parse nginx config content to extract server names and build site list
fn parse_nginx_config_for_sites(&self, config_content: &str) -> Vec<(String, String)> { fn parse_nginx_config_for_sites(&self, config_content: &str) -> Vec<(String, String)> {
let mut sites = Vec::new(); let mut sites = Vec::new();
let mut current_server_name: Option<String> = None; let lines: Vec<&str> = config_content.lines().collect();
let mut current_listen_port: Option<u16> = None; let mut i = 0;
let mut in_server_block = false;
for line in config_content.lines() { debug!("Parsing nginx config with {} lines", lines.len());
let line = line.trim();
while i < lines.len() {
if line.starts_with("server {") { let line = lines[i].trim();
in_server_block = true; if line.starts_with("server") && line.contains("{") {
current_server_name = None; if let Some(server_name) = self.parse_server_block(&lines, &mut i) {
current_listen_port = None; let url = format!("https://{}", server_name);
} else if line == "}" && in_server_block { sites.push((server_name.clone(), url));
// End of server block - create site entry if we have both name and port
if let (Some(name), Some(port)) = (&current_server_name, &current_listen_port) {
let url = format!("http://{}:{}", name, port);
sites.push((name.clone(), url));
} }
in_server_block = false; }
} else if in_server_block { i += 1;
if line.starts_with("server_name ") { }
if let Some(name) = line.split_whitespace().nth(1) {
current_server_name = Some(name.trim_end_matches(';').to_string()); debug!("Discovered {} nginx sites total", sites.len());
} sites
} else if line.starts_with("listen ") { }
if let Some(listen_spec) = line.split_whitespace().nth(1) {
let port_str = listen_spec.trim_end_matches(';').split(':').last().unwrap_or(listen_spec); /// Parse a server block to extract the primary server_name
if let Ok(port) = port_str.parse::<u16>() { fn parse_server_block(&self, lines: &[&str], start_index: &mut usize) -> Option<String> {
current_listen_port = Some(port); let mut server_names = Vec::new();
let mut has_redirect = false;
let mut i = *start_index + 1;
let mut brace_count = 1;
// Parse until we close the server block
while i < lines.len() && brace_count > 0 {
let trimmed = lines[i].trim();
// Track braces
brace_count += trimmed.matches('{').count();
brace_count -= trimmed.matches('}').count();
// Extract server_name
if trimmed.starts_with("server_name") {
if let Some(names_part) = trimmed.strip_prefix("server_name") {
let names_clean = names_part.trim().trim_end_matches(';');
for name in names_clean.split_whitespace() {
if name != "_"
&& !name.is_empty()
&& name.contains('.')
&& !name.starts_with('$')
{
server_names.push(name.to_string());
debug!("Found server_name in block: {}", name);
} }
} }
} }
} }
// Check for redirects (skip redirect-only servers)
if trimmed.contains("return") && (trimmed.contains("301") || trimmed.contains("302")) {
has_redirect = true;
}
i += 1;
} }
sites *start_index = i - 1;
if !server_names.is_empty() && !has_redirect {
return Some(server_names[0].clone());
}
None
} }
/// Check site latency via HTTP request /// Check site latency using HTTP GET requests
fn check_site_latency(&self, url: &str) -> Result<f32, Box<dyn std::error::Error>> { fn check_site_latency(&self, url: &str) -> Result<f32, Box<dyn std::error::Error>> {
use std::time::Duration;
use std::time::Instant; use std::time::Instant;
let start = Instant::now(); let start = Instant::now();
// Use curl for HTTP request with timeout
let output = Command::new("curl")
.args(&[
"-s",
"-o", "/dev/null",
"-w", "%{http_code}",
"--max-time", &self.config.http_timeout_seconds.to_string(),
"--connect-timeout", &self.config.http_connect_timeout_seconds.to_string(),
url
])
.output()?;
let elapsed = start.elapsed(); // Create HTTP client with timeouts from configuration
let latency_ms = elapsed.as_secs_f32() * 1000.0; let client = reqwest::blocking::Client::builder()
.timeout(Duration::from_secs(self.config.http_timeout_seconds))
.connect_timeout(Duration::from_secs(self.config.http_connect_timeout_seconds))
.redirect(reqwest::redirect::Policy::limited(10))
.build()?;
if output.status.success() { // Make GET request and measure latency
let http_code = String::from_utf8_lossy(&output.stdout); let response = client.get(url).send()?;
if http_code.starts_with("2") || http_code.starts_with("3") { let latency = start.elapsed().as_millis() as f32;
Ok(latency_ms)
} else { // Check if response is successful (2xx or 3xx status codes)
Err(format!("HTTP error: {}", http_code).into()) if response.status().is_success() || response.status().is_redirection() {
} Ok(latency)
} else { } else {
Err("HTTP request failed".into()) Err(format!(
"HTTP request failed for {} with status: {}",
url,
response.status()
)
.into())
} }
} }
@ -662,23 +782,40 @@ impl SystemdCollector {
fn get_docker_containers(&self) -> Vec<(String, String)> { fn get_docker_containers(&self) -> Vec<(String, String)> {
let mut containers = Vec::new(); let mut containers = Vec::new();
// Check if docker is available
let output = Command::new("docker") let output = Command::new("docker")
.args(&["ps", "--format", "{{.Names}}:{{.Status}}"]) .args(&["ps", "--format", "{{.Names}},{{.Status}}"])
.output(); .output();
if let Ok(output) = output { let output = match output {
if output.status.success() { Ok(out) if out.status.success() => out,
let output_str = String::from_utf8_lossy(&output.stdout); _ => return containers, // Docker not available or failed
for line in output_str.lines() { };
if let Some((name, status)) = line.split_once(':') {
let container_status = if status.contains("Up") { let output_str = match String::from_utf8(output.stdout) {
"active" Ok(s) => s,
} else { Err(_) => return containers,
"inactive" };
};
containers.push((format!("docker_{}", name), container_status.to_string())); for line in output_str.lines() {
} if line.trim().is_empty() {
} continue;
}
let parts: Vec<&str> = line.split(',').collect();
if parts.len() >= 2 {
let container_name = parts[0].trim();
let status_str = parts[1].trim();
let container_status = if status_str.contains("Up") {
"active"
} else if status_str.contains("Exited") {
"warning" // Match original: Exited → Warning, not inactive
} else {
"failed" // Other states → failed
};
containers.push((format!("docker_{}", container_name), container_status.to_string()));
} }
} }

View File

@ -1,6 +1,6 @@
[package] [package]
name = "cm-dashboard" name = "cm-dashboard"
version = "0.1.144" version = "0.1.145"
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@ -1,6 +1,6 @@
[package] [package]
name = "cm-dashboard-shared" name = "cm-dashboard-shared"
version = "0.1.144" version = "0.1.145"
edition = "2021" edition = "2021"
[dependencies] [dependencies]