Fix agent hang by reverting service discovery to systemctl
All checks were successful
Build and Release / build-and-release (push) Successful in 1m31s
All checks were successful
Build and Release / build-and-release (push) Successful in 1m31s
The D-Bus ListUnits call in discover_services_internal() was causing the agent to hang on startup. **Root cause:** - D-Bus ListUnits call with complex tuple destructuring hung indefinitely - Agent never completed first collection cycle - No collector output in logs **Fix:** - Revert discover_services_internal() to use systemctl list-units/list-unit-files - Keep D-Bus-based property queries (WorkingDirectory, MemoryCurrent, ExecStart) - Hybrid approach: systemctl for discovery, D-Bus for individual queries **External commands still used:** - systemctl list-units, list-unit-files (service discovery) - smartctl (SMART data) - sudo du (directory sizes) - nginx -T (config fallback) Version bump: 0.1.198 → 0.1.199
This commit is contained in:
parent
7ad149bbe4
commit
eab3f17428
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -493,7 +493,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.198"
|
version = "0.1.199"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
@ -515,7 +515,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.198"
|
version = "0.1.199"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@ -545,7 +545,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.198"
|
version = "0.1.199"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.198"
|
version = "0.1.199"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@ -263,45 +263,71 @@ impl SystemdCollector {
|
|||||||
state.nginx_site_metrics.clone()
|
state.nginx_site_metrics.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Auto-discover interesting services to monitor using D-Bus
|
/// Auto-discover interesting services to monitor using systemctl
|
||||||
async fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
|
async fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
|
||||||
// Connect to system D-Bus
|
// First: Get all service unit files (with 3 second timeout)
|
||||||
let connection = Connection::system().await?;
|
let unit_files_output = Command::new("timeout")
|
||||||
|
.args(&["3", "systemctl", "list-unit-files", "--type=service", "--no-pager", "--plain"])
|
||||||
|
.output()?;
|
||||||
|
|
||||||
// Get systemd manager proxy
|
if !unit_files_output.status.success() {
|
||||||
let proxy = zbus::Proxy::new(
|
return Err(anyhow::anyhow!("systemctl list-unit-files command failed"));
|
||||||
&connection,
|
}
|
||||||
"org.freedesktop.systemd1",
|
|
||||||
"/org/freedesktop/systemd1",
|
|
||||||
"org.freedesktop.systemd1.Manager",
|
|
||||||
).await?;
|
|
||||||
|
|
||||||
// List all units via D-Bus
|
// Second: Get runtime status of all units (with 3 second timeout)
|
||||||
let units: Vec<(String, String, String, String, String, String, zbus::zvariant::OwnedObjectPath, u32, String, zbus::zvariant::OwnedObjectPath)> =
|
let units_status_output = Command::new("timeout")
|
||||||
proxy.call("ListUnits", &()).await?;
|
.args(&["3", "systemctl", "list-units", "--type=service", "--all", "--no-pager", "--plain"])
|
||||||
|
.output()?;
|
||||||
|
|
||||||
|
if !units_status_output.status.success() {
|
||||||
|
return Err(anyhow::anyhow!("systemctl list-units command failed"));
|
||||||
|
}
|
||||||
|
|
||||||
|
let unit_files_str = String::from_utf8(unit_files_output.stdout)?;
|
||||||
|
let units_status_str = String::from_utf8(units_status_output.stdout)?;
|
||||||
|
let mut services = Vec::new();
|
||||||
|
|
||||||
|
let excluded_services = &self.config.excluded_services;
|
||||||
|
let service_name_filters = &self.config.service_name_filters;
|
||||||
|
|
||||||
|
// Parse all service unit files
|
||||||
let mut all_service_names = std::collections::HashSet::new();
|
let mut all_service_names = std::collections::HashSet::new();
|
||||||
let mut service_status_cache = std::collections::HashMap::new();
|
for line in unit_files_str.lines() {
|
||||||
|
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||||
// Parse D-Bus response for services only
|
if fields.len() >= 2 && fields[0].ends_with(".service") {
|
||||||
for unit in units {
|
let service_name = fields[0].trim_end_matches(".service");
|
||||||
let (unit_name, _description, load_state, active_state, sub_state, _followed, _unit_path, _job_id, _job_type, _job_path) = unit;
|
|
||||||
|
|
||||||
if unit_name.ends_with(".service") {
|
|
||||||
let service_name = unit_name.trim_end_matches(".service");
|
|
||||||
all_service_names.insert(service_name.to_string());
|
all_service_names.insert(service_name.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
service_status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
// Parse runtime status for all units
|
||||||
load_state: load_state.clone(),
|
let mut status_cache = std::collections::HashMap::new();
|
||||||
active_state: active_state.clone(),
|
for line in units_status_str.lines() {
|
||||||
sub_state: sub_state.clone(),
|
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
||||||
|
let service_name = fields[0].trim_end_matches(".service");
|
||||||
|
let load_state = fields.get(1).unwrap_or(&"unknown").to_string();
|
||||||
|
let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
|
||||||
|
let sub_state = fields.get(3).unwrap_or(&"unknown").to_string();
|
||||||
|
|
||||||
|
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||||
|
load_state,
|
||||||
|
active_state,
|
||||||
|
sub_state,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut services = Vec::new();
|
// For services found in unit files but not in runtime status, set default inactive status
|
||||||
let excluded_services = &self.config.excluded_services;
|
for service_name in &all_service_names {
|
||||||
let service_name_filters = &self.config.service_name_filters;
|
if !status_cache.contains_key(service_name) {
|
||||||
|
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||||
|
load_state: "not-loaded".to_string(),
|
||||||
|
active_state: "inactive".to_string(),
|
||||||
|
sub_state: "dead".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Process all discovered services and apply filters
|
// Process all discovered services and apply filters
|
||||||
for service_name in &all_service_names {
|
for service_name in &all_service_names {
|
||||||
@ -327,7 +353,7 @@ impl SystemdCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((services, service_status_cache))
|
Ok((services, status_cache))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get service status from D-Bus cache
|
/// Get service status from D-Bus cache
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.198"
|
version = "0.1.199"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.198"
|
version = "0.1.199"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user