From eab3f17428eefb1da04823a4dbf7fcda8fd2fabf Mon Sep 17 00:00:00 2001 From: Christoffer Martinsson Date: Fri, 28 Nov 2025 11:57:31 +0100 Subject: [PATCH] Fix agent hang by reverting service discovery to systemctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The D-Bus ListUnits call in discover_services_internal() was causing the agent to hang on startup. **Root cause:** - D-Bus ListUnits call with complex tuple destructuring hung indefinitely - Agent never completed first collection cycle - No collector output in logs **Fix:** - Revert discover_services_internal() to use systemctl list-units/list-unit-files - Keep D-Bus-based property queries (WorkingDirectory, MemoryCurrent, ExecStart) - Hybrid approach: systemctl for discovery, D-Bus for individual queries **External commands still used:** - systemctl list-units, list-unit-files (service discovery) - smartctl (SMART data) - sudo du (directory sizes) - nginx -T (config fallback) Version bump: 0.1.198 → 0.1.199 --- Cargo.lock | 6 +-- agent/Cargo.toml | 2 +- agent/src/collectors/systemd.rs | 84 +++++++++++++++++++++------------ dashboard/Cargo.toml | 2 +- shared/Cargo.toml | 2 +- 5 files changed, 61 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 818b66e..5a472ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -493,7 +493,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cm-dashboard" -version = "0.1.198" +version = "0.1.199" dependencies = [ "anyhow", "chrono", @@ -515,7 +515,7 @@ dependencies = [ [[package]] name = "cm-dashboard-agent" -version = "0.1.198" +version = "0.1.199" dependencies = [ "anyhow", "async-trait", @@ -545,7 +545,7 @@ dependencies = [ [[package]] name = "cm-dashboard-shared" -version = "0.1.198" +version = "0.1.199" dependencies = [ "chrono", "serde", diff --git a/agent/Cargo.toml b/agent/Cargo.toml index 07fa8cc..d99bcb9 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-agent" -version = "0.1.198" +version = "0.1.199" edition = "2021" [dependencies] diff --git a/agent/src/collectors/systemd.rs b/agent/src/collectors/systemd.rs index f1e17b9..2cd425d 100644 --- a/agent/src/collectors/systemd.rs +++ b/agent/src/collectors/systemd.rs @@ -263,45 +263,71 @@ impl SystemdCollector { state.nginx_site_metrics.clone() } - /// Auto-discover interesting services to monitor using D-Bus + /// Auto-discover interesting services to monitor using systemctl async fn discover_services_internal(&self) -> Result<(Vec, std::collections::HashMap)> { - // Connect to system D-Bus - let connection = Connection::system().await?; + // First: Get all service unit files (with 3 second timeout) + let unit_files_output = Command::new("timeout") + .args(&["3", "systemctl", "list-unit-files", "--type=service", "--no-pager", "--plain"]) + .output()?; - // Get systemd manager proxy - let proxy = zbus::Proxy::new( - &connection, - "org.freedesktop.systemd1", - "/org/freedesktop/systemd1", - "org.freedesktop.systemd1.Manager", - ).await?; + if !unit_files_output.status.success() { + return Err(anyhow::anyhow!("systemctl list-unit-files command failed")); + } - // List all units via D-Bus - let units: Vec<(String, String, String, String, String, String, zbus::zvariant::OwnedObjectPath, u32, String, zbus::zvariant::OwnedObjectPath)> = - proxy.call("ListUnits", &()).await?; + // Second: Get runtime status of all units (with 3 second timeout) + let units_status_output = Command::new("timeout") + .args(&["3", "systemctl", "list-units", "--type=service", "--all", "--no-pager", "--plain"]) + .output()?; + if !units_status_output.status.success() { + return Err(anyhow::anyhow!("systemctl list-units command failed")); + } + + let unit_files_str = String::from_utf8(unit_files_output.stdout)?; + let units_status_str = String::from_utf8(units_status_output.stdout)?; + let mut services = Vec::new(); + + let excluded_services = &self.config.excluded_services; + let service_name_filters = &self.config.service_name_filters; + + // Parse all service unit files let mut all_service_names = std::collections::HashSet::new(); - let mut service_status_cache = std::collections::HashMap::new(); - - // Parse D-Bus response for services only - for unit in units { - let (unit_name, _description, load_state, active_state, sub_state, _followed, _unit_path, _job_id, _job_type, _job_path) = unit; - - if unit_name.ends_with(".service") { - let service_name = unit_name.trim_end_matches(".service"); + for line in unit_files_str.lines() { + let fields: Vec<&str> = line.split_whitespace().collect(); + if fields.len() >= 2 && fields[0].ends_with(".service") { + let service_name = fields[0].trim_end_matches(".service"); all_service_names.insert(service_name.to_string()); + } + } - service_status_cache.insert(service_name.to_string(), ServiceStatusInfo { - load_state: load_state.clone(), - active_state: active_state.clone(), - sub_state: sub_state.clone(), + // Parse runtime status for all units + let mut status_cache = std::collections::HashMap::new(); + for line in units_status_str.lines() { + let fields: Vec<&str> = line.split_whitespace().collect(); + if fields.len() >= 4 && fields[0].ends_with(".service") { + let service_name = fields[0].trim_end_matches(".service"); + let load_state = fields.get(1).unwrap_or(&"unknown").to_string(); + let active_state = fields.get(2).unwrap_or(&"unknown").to_string(); + let sub_state = fields.get(3).unwrap_or(&"unknown").to_string(); + + status_cache.insert(service_name.to_string(), ServiceStatusInfo { + load_state, + active_state, + sub_state, }); } } - let mut services = Vec::new(); - let excluded_services = &self.config.excluded_services; - let service_name_filters = &self.config.service_name_filters; + // For services found in unit files but not in runtime status, set default inactive status + for service_name in &all_service_names { + if !status_cache.contains_key(service_name) { + status_cache.insert(service_name.to_string(), ServiceStatusInfo { + load_state: "not-loaded".to_string(), + active_state: "inactive".to_string(), + sub_state: "dead".to_string(), + }); + } + } // Process all discovered services and apply filters for service_name in &all_service_names { @@ -327,7 +353,7 @@ impl SystemdCollector { } } - Ok((services, service_status_cache)) + Ok((services, status_cache)) } /// Get service status from D-Bus cache diff --git a/dashboard/Cargo.toml b/dashboard/Cargo.toml index bd441f2..16f49f6 100644 --- a/dashboard/Cargo.toml +++ b/dashboard/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard" -version = "0.1.198" +version = "0.1.199" edition = "2021" [dependencies] diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 708b362..af20240 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-shared" -version = "0.1.198" +version = "0.1.199" edition = "2021" [dependencies]