From 355a986582eb07017dd78525edd6630da7e31c50 Mon Sep 17 00:00:00 2001 From: Christoffer Martinsson Date: Tue, 14 Oct 2025 20:53:07 +0200 Subject: [PATCH] Fix nginx site monitoring to properly detect errors - Return error status for HTTP 502/5xx responses instead of success - Show 'error' description for sites with connectivity but wrong status codes - Show 'unreachable' description for complete connection failures - Each nginx site now has independent status based on actual health - Sites with timeouts or server errors will trigger notifications --- agent/src/collectors/service.rs | 35 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/agent/src/collectors/service.rs b/agent/src/collectors/service.rs index d816272..c7cf12b 100644 --- a/agent/src/collectors/service.rs +++ b/agent/src/collectors/service.rs @@ -832,7 +832,8 @@ impl ServiceCollector { std::env::var("UID").unwrap_or_default() == "0" } - async fn measure_site_latency(&self, site_name: &str) -> Option { + async fn measure_site_latency(&self, site_name: &str) -> (Option, bool) { + // Returns (latency, is_healthy) // Construct URL from site name let url = if site_name.contains("localhost") || site_name.contains("127.0.0.1") { format!("http://{}", site_name) @@ -841,10 +842,13 @@ impl ServiceCollector { }; // Create HTTP client with short timeout - let client = reqwest::Client::builder() + let client = match reqwest::Client::builder() .timeout(Duration::from_secs(5)) .build() - .ok()?; + { + Ok(client) => client, + Err(_) => return (None, false), + }; let start = Instant::now(); @@ -852,16 +856,12 @@ impl ServiceCollector { match client.head(&url).send().await { Ok(response) => { let latency = start.elapsed().as_millis() as f32; - if response.status().is_success() || response.status().is_redirection() { - Some(latency) - } else { - // Site is reachable but returned error, still measure latency - Some(latency) - } + let is_healthy = response.status().is_success() || response.status().is_redirection(); + (Some(latency), is_healthy) } Err(_) => { - // Connection failed, no latency measurement - None + // Connection failed, no latency measurement, not healthy + (None, false) } } } @@ -1355,13 +1355,14 @@ impl Collector for ServiceCollector { // Add nginx sites as individual sub-services if let Some(sites) = self.get_nginx_sites().await { for site in sites.iter() { - // Measure latency for this site - let latency = self.measure_site_latency(site).await; + // Measure latency and health for this site + let (latency, is_healthy) = self.measure_site_latency(site).await; - // Determine status and description based on latency measurement - let (site_status, site_description) = match latency { - Some(_ms) => (ServiceStatus::Running, None), - None => (ServiceStatus::Stopped, Some(vec!["unreachable".to_string()])), + // Determine status and description based on latency and health + let (site_status, site_description) = match (latency, is_healthy) { + (Some(_ms), true) => (ServiceStatus::Running, None), + (Some(_ms), false) => (ServiceStatus::Stopped, Some(vec!["error".to_string()])), + (None, _) => (ServiceStatus::Stopped, Some(vec!["unreachable".to_string()])), }; // Update counters based on site status