Fix RwLock deadlock in systemd collector Phase 4
- Restructure get_monitored_services to avoid nested write locks - Split discover_services into discover_services_internal that returns data - Update state in separate scope to prevent deadlock - Fix borrow checker errors with clone() for status cache
This commit is contained in:
parent
7607e971b8
commit
0f12438ab4
17
TODO.md
17
TODO.md
@ -10,6 +10,22 @@
|
|||||||
|
|
||||||
## System panel (agent/dashboard)
|
## System panel (agent/dashboard)
|
||||||
|
|
||||||
|
use following layput:
|
||||||
|
'''
|
||||||
|
NixOS:
|
||||||
|
Version: xxxxxxxxxx
|
||||||
|
Active users: cm, simon
|
||||||
|
CPU:
|
||||||
|
● Load: 0.02 0.31 0.86 • 3000.2 MHz
|
||||||
|
RAM:
|
||||||
|
● Usage: 33% 2.6GB/7.6GB
|
||||||
|
● /tmp: 0.0% 0B/2.0GB
|
||||||
|
Storage:
|
||||||
|
● root (Single):
|
||||||
|
├─ ● nvme0n1 W:4%
|
||||||
|
└─ ● 8.3% 75.0GB/906.2GB
|
||||||
|
'''
|
||||||
|
|
||||||
- Add support to show login users
|
- Add support to show login users
|
||||||
- Add support to show timestamp/version for latest nixos rebuild
|
- Add support to show timestamp/version for latest nixos rebuild
|
||||||
|
|
||||||
@ -24,3 +40,4 @@
|
|||||||
- Add lower statusbar with dynamic updated shortcuts when switchng between panels
|
- Add lower statusbar with dynamic updated shortcuts when switchng between panels
|
||||||
- Add support for send command via dashboard to agent to do nixos rebuid
|
- Add support for send command via dashboard to agent to do nixos rebuid
|
||||||
- Add support for navigating services in dashboard and trigger start/stop/restart
|
- Add support for navigating services in dashboard and trigger start/stop/restart
|
||||||
|
- Add support for trigger backup
|
||||||
|
|||||||
@ -63,28 +63,35 @@ impl SystemdCollector {
|
|||||||
|
|
||||||
/// Get monitored services, discovering them if needed or cache is expired
|
/// Get monitored services, discovering them if needed or cache is expired
|
||||||
fn get_monitored_services(&self) -> Result<Vec<String>> {
|
fn get_monitored_services(&self) -> Result<Vec<String>> {
|
||||||
let mut state = self.state.write().unwrap();
|
// Check if we need discovery without holding the lock
|
||||||
|
let needs_discovery = {
|
||||||
// Check if we need to discover services
|
let state = self.state.read().unwrap();
|
||||||
let needs_discovery = match state.last_discovery_time {
|
match state.last_discovery_time {
|
||||||
None => true, // First time
|
None => true, // First time
|
||||||
Some(last_time) => {
|
Some(last_time) => {
|
||||||
let elapsed = last_time.elapsed().as_secs();
|
let elapsed = last_time.elapsed().as_secs();
|
||||||
elapsed >= state.discovery_interval_seconds
|
elapsed >= state.discovery_interval_seconds
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if needs_discovery {
|
if needs_discovery {
|
||||||
debug!("Discovering systemd services (cache expired or first run)");
|
debug!("Discovering systemd services (cache expired or first run)");
|
||||||
match self.discover_services() {
|
// Call discover_services_internal which doesn't update state
|
||||||
Ok(services) => {
|
match self.discover_services_internal() {
|
||||||
state.monitored_services = services;
|
Ok((services, status_cache)) => {
|
||||||
state.last_discovery_time = Some(Instant::now());
|
// Update state with discovered services in a separate scope
|
||||||
debug!(
|
if let Ok(mut state) = self.state.write() {
|
||||||
"Auto-discovered {} services to monitor: {:?}",
|
state.monitored_services = services.clone();
|
||||||
state.monitored_services.len(),
|
state.service_status_cache = status_cache;
|
||||||
state.monitored_services
|
state.last_discovery_time = Some(Instant::now());
|
||||||
);
|
debug!(
|
||||||
|
"Auto-discovered {} services to monitor: {:?}",
|
||||||
|
state.monitored_services.len(),
|
||||||
|
state.monitored_services
|
||||||
|
);
|
||||||
|
return Ok(services);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
debug!("Failed to discover services, using cached list: {}", e);
|
debug!("Failed to discover services, using cached list: {}", e);
|
||||||
@ -93,6 +100,8 @@ impl SystemdCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return cached services
|
||||||
|
let state = self.state.read().unwrap();
|
||||||
Ok(state.monitored_services.clone())
|
Ok(state.monitored_services.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,8 +134,8 @@ impl SystemdCollector {
|
|||||||
state.nginx_site_metrics.clone()
|
state.nginx_site_metrics.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Auto-discover interesting services to monitor
|
/// Auto-discover interesting services to monitor (internal version that doesn't update state)
|
||||||
fn discover_services(&self) -> Result<Vec<String>> {
|
fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
|
||||||
debug!("Starting systemd service discovery with status caching");
|
debug!("Starting systemd service discovery with status caching");
|
||||||
// Get all services (includes inactive, running, failed - everything)
|
// Get all services (includes inactive, running, failed - everything)
|
||||||
let units_output = Command::new("systemctl")
|
let units_output = Command::new("systemctl")
|
||||||
@ -169,9 +178,9 @@ impl SystemdCollector {
|
|||||||
|
|
||||||
// Cache the status information
|
// Cache the status information
|
||||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||||
load_state,
|
load_state: load_state.clone(),
|
||||||
active_state,
|
active_state: active_state.clone(),
|
||||||
sub_state,
|
sub_state: sub_state.clone(),
|
||||||
description,
|
description,
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -216,17 +225,12 @@ impl SystemdCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the cache with all discovered service status information
|
|
||||||
if let Ok(mut state) = self.state.write() {
|
|
||||||
state.service_status_cache = status_cache;
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("Service discovery completed: found {} matching services: {:?}", services.len(), services);
|
debug!("Service discovery completed: found {} matching services: {:?}", services.len(), services);
|
||||||
if services.is_empty() {
|
if services.is_empty() {
|
||||||
debug!("No services found matching the configured filters - this may indicate a parsing issue");
|
debug!("No services found matching the configured filters - this may indicate a parsing issue");
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(services)
|
Ok((services, status_cache))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if service name matches pattern (supports wildcards like nginx*)
|
/// Check if service name matches pattern (supports wildcards like nginx*)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user