From 29d371f1a8f1dd3f8f023c2603f23e31a3c6cbeb Mon Sep 17 00:00:00 2001 From: Christoffer Martinsson Date: Thu, 23 Oct 2025 11:43:41 +0200 Subject: [PATCH] Fix systemd collector crashes and improve error handling Prevent silent crashes when RwLock operations fail by replacing unwrap() calls with proper error handling. Systemd collector now gracefully handles cases where no services are found (valid state) instead of crashing. - Replace unwrap() with proper error handling in get_monitored_services() - Add crash prevention in get_nginx_site_metrics() - Add debug logging for empty service discovery results - Ensure collector completes successfully even with 0 services found This fixes cmbox disappearing from dashboard when service discovery fails. --- agent/src/collectors/systemd.rs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/agent/src/collectors/systemd.rs b/agent/src/collectors/systemd.rs index 4ff4ac0..88b47b3 100644 --- a/agent/src/collectors/systemd.rs +++ b/agent/src/collectors/systemd.rs @@ -56,7 +56,13 @@ impl SystemdCollector { /// Get monitored services, discovering them if needed or cache is expired fn get_monitored_services(&self) -> Result> { - let mut state = self.state.write().unwrap(); + let mut state = match self.state.write() { + Ok(state) => state, + Err(e) => { + debug!("Failed to acquire write lock on systemd state: {}", e); + return Ok(Vec::new()); // Return empty list instead of crashing + } + }; // Check if we need to discover services let needs_discovery = match state.last_discovery_time { @@ -91,7 +97,13 @@ impl SystemdCollector { /// Get nginx site metrics, checking them if cache is expired fn get_nginx_site_metrics(&self) -> Vec { - let mut state = self.state.write().unwrap(); + let mut state = match self.state.write() { + Ok(state) => state, + Err(e) => { + debug!("Failed to acquire write lock for nginx metrics: {}", e); + return Vec::new(); // Return empty list instead of crashing + } + }; // Check if we need to refresh nginx site metrics let needs_refresh = match state.last_nginx_check_time { @@ -201,6 +213,11 @@ impl SystemdCollector { state.service_status_cache = status_cache; } + debug!("Service discovery completed: found {} matching services", services.len()); + if services.is_empty() { + debug!("No services found matching the configured filters - this is a valid state"); + } + Ok(services) }