Fix agent hang by reverting service discovery to systemctl
All checks were successful
Build and Release / build-and-release (push) Successful in 1m31s
All checks were successful
Build and Release / build-and-release (push) Successful in 1m31s
The D-Bus ListUnits call in discover_services_internal() was causing the agent to hang on startup. **Root cause:** - D-Bus ListUnits call with complex tuple destructuring hung indefinitely - Agent never completed first collection cycle - No collector output in logs **Fix:** - Revert discover_services_internal() to use systemctl list-units/list-unit-files - Keep D-Bus-based property queries (WorkingDirectory, MemoryCurrent, ExecStart) - Hybrid approach: systemctl for discovery, D-Bus for individual queries **External commands still used:** - systemctl list-units, list-unit-files (service discovery) - smartctl (SMART data) - sudo du (directory sizes) - nginx -T (config fallback) Version bump: 0.1.198 → 0.1.199
This commit is contained in:
parent
7ad149bbe4
commit
eab3f17428
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -493,7 +493,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.198"
|
||||
version = "0.1.199"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@ -515,7 +515,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.198"
|
||||
version = "0.1.199"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@ -545,7 +545,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.198"
|
||||
version = "0.1.199"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.198"
|
||||
version = "0.1.199"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@ -263,45 +263,71 @@ impl SystemdCollector {
|
||||
state.nginx_site_metrics.clone()
|
||||
}
|
||||
|
||||
/// Auto-discover interesting services to monitor using D-Bus
|
||||
/// Auto-discover interesting services to monitor using systemctl
|
||||
async fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
|
||||
// Connect to system D-Bus
|
||||
let connection = Connection::system().await?;
|
||||
// First: Get all service unit files (with 3 second timeout)
|
||||
let unit_files_output = Command::new("timeout")
|
||||
.args(&["3", "systemctl", "list-unit-files", "--type=service", "--no-pager", "--plain"])
|
||||
.output()?;
|
||||
|
||||
// Get systemd manager proxy
|
||||
let proxy = zbus::Proxy::new(
|
||||
&connection,
|
||||
"org.freedesktop.systemd1",
|
||||
"/org/freedesktop/systemd1",
|
||||
"org.freedesktop.systemd1.Manager",
|
||||
).await?;
|
||||
if !unit_files_output.status.success() {
|
||||
return Err(anyhow::anyhow!("systemctl list-unit-files command failed"));
|
||||
}
|
||||
|
||||
// List all units via D-Bus
|
||||
let units: Vec<(String, String, String, String, String, String, zbus::zvariant::OwnedObjectPath, u32, String, zbus::zvariant::OwnedObjectPath)> =
|
||||
proxy.call("ListUnits", &()).await?;
|
||||
// Second: Get runtime status of all units (with 3 second timeout)
|
||||
let units_status_output = Command::new("timeout")
|
||||
.args(&["3", "systemctl", "list-units", "--type=service", "--all", "--no-pager", "--plain"])
|
||||
.output()?;
|
||||
|
||||
if !units_status_output.status.success() {
|
||||
return Err(anyhow::anyhow!("systemctl list-units command failed"));
|
||||
}
|
||||
|
||||
let unit_files_str = String::from_utf8(unit_files_output.stdout)?;
|
||||
let units_status_str = String::from_utf8(units_status_output.stdout)?;
|
||||
let mut services = Vec::new();
|
||||
|
||||
let excluded_services = &self.config.excluded_services;
|
||||
let service_name_filters = &self.config.service_name_filters;
|
||||
|
||||
// Parse all service unit files
|
||||
let mut all_service_names = std::collections::HashSet::new();
|
||||
let mut service_status_cache = std::collections::HashMap::new();
|
||||
|
||||
// Parse D-Bus response for services only
|
||||
for unit in units {
|
||||
let (unit_name, _description, load_state, active_state, sub_state, _followed, _unit_path, _job_id, _job_type, _job_path) = unit;
|
||||
|
||||
if unit_name.ends_with(".service") {
|
||||
let service_name = unit_name.trim_end_matches(".service");
|
||||
for line in unit_files_str.lines() {
|
||||
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||
if fields.len() >= 2 && fields[0].ends_with(".service") {
|
||||
let service_name = fields[0].trim_end_matches(".service");
|
||||
all_service_names.insert(service_name.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
service_status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||
load_state: load_state.clone(),
|
||||
active_state: active_state.clone(),
|
||||
sub_state: sub_state.clone(),
|
||||
// Parse runtime status for all units
|
||||
let mut status_cache = std::collections::HashMap::new();
|
||||
for line in units_status_str.lines() {
|
||||
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
||||
let service_name = fields[0].trim_end_matches(".service");
|
||||
let load_state = fields.get(1).unwrap_or(&"unknown").to_string();
|
||||
let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
|
||||
let sub_state = fields.get(3).unwrap_or(&"unknown").to_string();
|
||||
|
||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||
load_state,
|
||||
active_state,
|
||||
sub_state,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut services = Vec::new();
|
||||
let excluded_services = &self.config.excluded_services;
|
||||
let service_name_filters = &self.config.service_name_filters;
|
||||
// For services found in unit files but not in runtime status, set default inactive status
|
||||
for service_name in &all_service_names {
|
||||
if !status_cache.contains_key(service_name) {
|
||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||
load_state: "not-loaded".to_string(),
|
||||
active_state: "inactive".to_string(),
|
||||
sub_state: "dead".to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Process all discovered services and apply filters
|
||||
for service_name in &all_service_names {
|
||||
@ -327,7 +353,7 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
Ok((services, service_status_cache))
|
||||
Ok((services, status_cache))
|
||||
}
|
||||
|
||||
/// Get service status from D-Bus cache
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.198"
|
||||
version = "0.1.199"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.198"
|
||||
version = "0.1.199"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user