Fix systemd collector crashes and improve error handling

Prevent silent crashes when RwLock operations fail by replacing unwrap()
calls with proper error handling. Systemd collector now gracefully handles
cases where no services are found (valid state) instead of crashing.

- Replace unwrap() with proper error handling in get_monitored_services()
- Add crash prevention in get_nginx_site_metrics()
- Add debug logging for empty service discovery results
- Ensure collector completes successfully even with 0 services found

This fixes cmbox disappearing from dashboard when service discovery fails.
This commit is contained in:
Christoffer Martinsson 2025-10-23 11:43:41 +02:00
parent 5babf9a04e
commit 29d371f1a8

View File

@ -56,7 +56,13 @@ impl SystemdCollector {
/// Get monitored services, discovering them if needed or cache is expired
fn get_monitored_services(&self) -> Result<Vec<String>> {
let mut state = self.state.write().unwrap();
let mut state = match self.state.write() {
Ok(state) => state,
Err(e) => {
debug!("Failed to acquire write lock on systemd state: {}", e);
return Ok(Vec::new()); // Return empty list instead of crashing
}
};
// Check if we need to discover services
let needs_discovery = match state.last_discovery_time {
@ -91,7 +97,13 @@ impl SystemdCollector {
/// Get nginx site metrics, checking them if cache is expired
fn get_nginx_site_metrics(&self) -> Vec<Metric> {
let mut state = self.state.write().unwrap();
let mut state = match self.state.write() {
Ok(state) => state,
Err(e) => {
debug!("Failed to acquire write lock for nginx metrics: {}", e);
return Vec::new(); // Return empty list instead of crashing
}
};
// Check if we need to refresh nginx site metrics
let needs_refresh = match state.last_nginx_check_time {
@ -201,6 +213,11 @@ impl SystemdCollector {
state.service_status_cache = status_cache;
}
debug!("Service discovery completed: found {} matching services", services.len());
if services.is_empty() {
debug!("No services found matching the configured filters - this is a valid state");
}
Ok(services)
}