Fix RwLock deadlock in systemd collector Phase 4

- Restructure get_monitored_services to avoid nested write locks
- Split discover_services into discover_services_internal that returns data
- Update state in separate scope to prevent deadlock
- Fix borrow checker errors with clone() for status cache
This commit is contained in:
2025-10-23 13:12:53 +02:00
parent 7607e971b8
commit 0f12438ab4
2 changed files with 49 additions and 28 deletions

View File

@@ -63,28 +63,35 @@ impl SystemdCollector {
/// Get monitored services, discovering them if needed or cache is expired
fn get_monitored_services(&self) -> Result<Vec<String>> {
let mut state = self.state.write().unwrap();
// Check if we need to discover services
let needs_discovery = match state.last_discovery_time {
None => true, // First time
Some(last_time) => {
let elapsed = last_time.elapsed().as_secs();
elapsed >= state.discovery_interval_seconds
// Check if we need discovery without holding the lock
let needs_discovery = {
let state = self.state.read().unwrap();
match state.last_discovery_time {
None => true, // First time
Some(last_time) => {
let elapsed = last_time.elapsed().as_secs();
elapsed >= state.discovery_interval_seconds
}
}
};
if needs_discovery {
debug!("Discovering systemd services (cache expired or first run)");
match self.discover_services() {
Ok(services) => {
state.monitored_services = services;
state.last_discovery_time = Some(Instant::now());
debug!(
"Auto-discovered {} services to monitor: {:?}",
state.monitored_services.len(),
state.monitored_services
);
// Call discover_services_internal which doesn't update state
match self.discover_services_internal() {
Ok((services, status_cache)) => {
// Update state with discovered services in a separate scope
if let Ok(mut state) = self.state.write() {
state.monitored_services = services.clone();
state.service_status_cache = status_cache;
state.last_discovery_time = Some(Instant::now());
debug!(
"Auto-discovered {} services to monitor: {:?}",
state.monitored_services.len(),
state.monitored_services
);
return Ok(services);
}
}
Err(e) => {
debug!("Failed to discover services, using cached list: {}", e);
@@ -93,6 +100,8 @@ impl SystemdCollector {
}
}
// Return cached services
let state = self.state.read().unwrap();
Ok(state.monitored_services.clone())
}
@@ -125,8 +134,8 @@ impl SystemdCollector {
state.nginx_site_metrics.clone()
}
/// Auto-discover interesting services to monitor
fn discover_services(&self) -> Result<Vec<String>> {
/// Auto-discover interesting services to monitor (internal version that doesn't update state)
fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
debug!("Starting systemd service discovery with status caching");
// Get all services (includes inactive, running, failed - everything)
let units_output = Command::new("systemctl")
@@ -169,9 +178,9 @@ impl SystemdCollector {
// Cache the status information
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
load_state,
active_state,
sub_state,
load_state: load_state.clone(),
active_state: active_state.clone(),
sub_state: sub_state.clone(),
description,
});
@@ -216,17 +225,12 @@ impl SystemdCollector {
}
}
// Update the cache with all discovered service status information
if let Ok(mut state) = self.state.write() {
state.service_status_cache = status_cache;
}
debug!("Service discovery completed: found {} matching services: {:?}", services.len(), services);
if services.is_empty() {
debug!("No services found matching the configured filters - this may indicate a parsing issue");
}
Ok(services)
Ok((services, status_cache))
}
/// Check if service name matches pattern (supports wildcards like nginx*)