Fix RwLock deadlock in systemd collector Phase 4
- Restructure get_monitored_services to avoid nested write locks - Split discover_services into discover_services_internal that returns data - Update state in separate scope to prevent deadlock - Fix borrow checker errors with clone() for status cache
This commit is contained in:
parent
7607e971b8
commit
0f12438ab4
17
TODO.md
17
TODO.md
@ -10,6 +10,22 @@
|
||||
|
||||
## System panel (agent/dashboard)
|
||||
|
||||
use following layput:
|
||||
'''
|
||||
NixOS:
|
||||
Version: xxxxxxxxxx
|
||||
Active users: cm, simon
|
||||
CPU:
|
||||
● Load: 0.02 0.31 0.86 • 3000.2 MHz
|
||||
RAM:
|
||||
● Usage: 33% 2.6GB/7.6GB
|
||||
● /tmp: 0.0% 0B/2.0GB
|
||||
Storage:
|
||||
● root (Single):
|
||||
├─ ● nvme0n1 W:4%
|
||||
└─ ● 8.3% 75.0GB/906.2GB
|
||||
'''
|
||||
|
||||
- Add support to show login users
|
||||
- Add support to show timestamp/version for latest nixos rebuild
|
||||
|
||||
@ -24,3 +40,4 @@
|
||||
- Add lower statusbar with dynamic updated shortcuts when switchng between panels
|
||||
- Add support for send command via dashboard to agent to do nixos rebuid
|
||||
- Add support for navigating services in dashboard and trigger start/stop/restart
|
||||
- Add support for trigger backup
|
||||
|
||||
@ -63,28 +63,35 @@ impl SystemdCollector {
|
||||
|
||||
/// Get monitored services, discovering them if needed or cache is expired
|
||||
fn get_monitored_services(&self) -> Result<Vec<String>> {
|
||||
let mut state = self.state.write().unwrap();
|
||||
|
||||
// Check if we need to discover services
|
||||
let needs_discovery = match state.last_discovery_time {
|
||||
None => true, // First time
|
||||
Some(last_time) => {
|
||||
let elapsed = last_time.elapsed().as_secs();
|
||||
elapsed >= state.discovery_interval_seconds
|
||||
// Check if we need discovery without holding the lock
|
||||
let needs_discovery = {
|
||||
let state = self.state.read().unwrap();
|
||||
match state.last_discovery_time {
|
||||
None => true, // First time
|
||||
Some(last_time) => {
|
||||
let elapsed = last_time.elapsed().as_secs();
|
||||
elapsed >= state.discovery_interval_seconds
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if needs_discovery {
|
||||
debug!("Discovering systemd services (cache expired or first run)");
|
||||
match self.discover_services() {
|
||||
Ok(services) => {
|
||||
state.monitored_services = services;
|
||||
state.last_discovery_time = Some(Instant::now());
|
||||
debug!(
|
||||
"Auto-discovered {} services to monitor: {:?}",
|
||||
state.monitored_services.len(),
|
||||
state.monitored_services
|
||||
);
|
||||
// Call discover_services_internal which doesn't update state
|
||||
match self.discover_services_internal() {
|
||||
Ok((services, status_cache)) => {
|
||||
// Update state with discovered services in a separate scope
|
||||
if let Ok(mut state) = self.state.write() {
|
||||
state.monitored_services = services.clone();
|
||||
state.service_status_cache = status_cache;
|
||||
state.last_discovery_time = Some(Instant::now());
|
||||
debug!(
|
||||
"Auto-discovered {} services to monitor: {:?}",
|
||||
state.monitored_services.len(),
|
||||
state.monitored_services
|
||||
);
|
||||
return Ok(services);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Failed to discover services, using cached list: {}", e);
|
||||
@ -93,6 +100,8 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
// Return cached services
|
||||
let state = self.state.read().unwrap();
|
||||
Ok(state.monitored_services.clone())
|
||||
}
|
||||
|
||||
@ -125,8 +134,8 @@ impl SystemdCollector {
|
||||
state.nginx_site_metrics.clone()
|
||||
}
|
||||
|
||||
/// Auto-discover interesting services to monitor
|
||||
fn discover_services(&self) -> Result<Vec<String>> {
|
||||
/// Auto-discover interesting services to monitor (internal version that doesn't update state)
|
||||
fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
|
||||
debug!("Starting systemd service discovery with status caching");
|
||||
// Get all services (includes inactive, running, failed - everything)
|
||||
let units_output = Command::new("systemctl")
|
||||
@ -169,9 +178,9 @@ impl SystemdCollector {
|
||||
|
||||
// Cache the status information
|
||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||
load_state,
|
||||
active_state,
|
||||
sub_state,
|
||||
load_state: load_state.clone(),
|
||||
active_state: active_state.clone(),
|
||||
sub_state: sub_state.clone(),
|
||||
description,
|
||||
});
|
||||
|
||||
@ -216,17 +225,12 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
// Update the cache with all discovered service status information
|
||||
if let Ok(mut state) = self.state.write() {
|
||||
state.service_status_cache = status_cache;
|
||||
}
|
||||
|
||||
debug!("Service discovery completed: found {} matching services: {:?}", services.len(), services);
|
||||
if services.is_empty() {
|
||||
debug!("No services found matching the configured filters - this may indicate a parsing issue");
|
||||
}
|
||||
|
||||
Ok(services)
|
||||
Ok((services, status_cache))
|
||||
}
|
||||
|
||||
/// Check if service name matches pattern (supports wildcards like nginx*)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user