Fix RwLock deadlock in systemd collector Phase 4

- Restructure get_monitored_services to avoid nested write locks
- Split discover_services into discover_services_internal that returns data
- Update state in separate scope to prevent deadlock
- Fix borrow checker errors with clone() for status cache
This commit is contained in:
Christoffer Martinsson 2025-10-23 13:12:53 +02:00
parent 7607e971b8
commit 0f12438ab4
2 changed files with 49 additions and 28 deletions

17
TODO.md
View File

@ -10,6 +10,22 @@
## System panel (agent/dashboard)
use following layput:
'''
NixOS:
Version: xxxxxxxxxx
Active users: cm, simon
CPU:
● Load: 0.02 0.31 0.86 • 3000.2 MHz
RAM:
● Usage: 33% 2.6GB/7.6GB
● /tmp: 0.0% 0B/2.0GB
Storage:
● root (Single):
├─ ● nvme0n1 W:4%
└─ ● 8.3% 75.0GB/906.2GB
'''
- Add support to show login users
- Add support to show timestamp/version for latest nixos rebuild
@ -24,3 +40,4 @@
- Add lower statusbar with dynamic updated shortcuts when switchng between panels
- Add support for send command via dashboard to agent to do nixos rebuid
- Add support for navigating services in dashboard and trigger start/stop/restart
- Add support for trigger backup

View File

@ -63,28 +63,35 @@ impl SystemdCollector {
/// Get monitored services, discovering them if needed or cache is expired
fn get_monitored_services(&self) -> Result<Vec<String>> {
let mut state = self.state.write().unwrap();
// Check if we need to discover services
let needs_discovery = match state.last_discovery_time {
None => true, // First time
Some(last_time) => {
let elapsed = last_time.elapsed().as_secs();
elapsed >= state.discovery_interval_seconds
// Check if we need discovery without holding the lock
let needs_discovery = {
let state = self.state.read().unwrap();
match state.last_discovery_time {
None => true, // First time
Some(last_time) => {
let elapsed = last_time.elapsed().as_secs();
elapsed >= state.discovery_interval_seconds
}
}
};
if needs_discovery {
debug!("Discovering systemd services (cache expired or first run)");
match self.discover_services() {
Ok(services) => {
state.monitored_services = services;
state.last_discovery_time = Some(Instant::now());
debug!(
"Auto-discovered {} services to monitor: {:?}",
state.monitored_services.len(),
state.monitored_services
);
// Call discover_services_internal which doesn't update state
match self.discover_services_internal() {
Ok((services, status_cache)) => {
// Update state with discovered services in a separate scope
if let Ok(mut state) = self.state.write() {
state.monitored_services = services.clone();
state.service_status_cache = status_cache;
state.last_discovery_time = Some(Instant::now());
debug!(
"Auto-discovered {} services to monitor: {:?}",
state.monitored_services.len(),
state.monitored_services
);
return Ok(services);
}
}
Err(e) => {
debug!("Failed to discover services, using cached list: {}", e);
@ -93,6 +100,8 @@ impl SystemdCollector {
}
}
// Return cached services
let state = self.state.read().unwrap();
Ok(state.monitored_services.clone())
}
@ -125,8 +134,8 @@ impl SystemdCollector {
state.nginx_site_metrics.clone()
}
/// Auto-discover interesting services to monitor
fn discover_services(&self) -> Result<Vec<String>> {
/// Auto-discover interesting services to monitor (internal version that doesn't update state)
fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
debug!("Starting systemd service discovery with status caching");
// Get all services (includes inactive, running, failed - everything)
let units_output = Command::new("systemctl")
@ -169,9 +178,9 @@ impl SystemdCollector {
// Cache the status information
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
load_state,
active_state,
sub_state,
load_state: load_state.clone(),
active_state: active_state.clone(),
sub_state: sub_state.clone(),
description,
});
@ -216,17 +225,12 @@ impl SystemdCollector {
}
}
// Update the cache with all discovered service status information
if let Ok(mut state) = self.state.write() {
state.service_status_cache = status_cache;
}
debug!("Service discovery completed: found {} matching services: {:?}", services.len(), services);
if services.is_empty() {
debug!("No services found matching the configured filters - this may indicate a parsing issue");
}
Ok(services)
Ok((services, status_cache))
}
/// Check if service name matches pattern (supports wildcards like nginx*)