Fix nginx site monitoring to properly detect errors
- Return error status for HTTP 502/5xx responses instead of success - Show 'error' description for sites with connectivity but wrong status codes - Show 'unreachable' description for complete connection failures - Each nginx site now has independent status based on actual health - Sites with timeouts or server errors will trigger notifications
This commit is contained in:
parent
e64527ce2f
commit
355a986582
@ -832,7 +832,8 @@ impl ServiceCollector {
|
||||
std::env::var("UID").unwrap_or_default() == "0"
|
||||
}
|
||||
|
||||
async fn measure_site_latency(&self, site_name: &str) -> Option<f32> {
|
||||
async fn measure_site_latency(&self, site_name: &str) -> (Option<f32>, bool) {
|
||||
// Returns (latency, is_healthy)
|
||||
// Construct URL from site name
|
||||
let url = if site_name.contains("localhost") || site_name.contains("127.0.0.1") {
|
||||
format!("http://{}", site_name)
|
||||
@ -841,10 +842,13 @@ impl ServiceCollector {
|
||||
};
|
||||
|
||||
// Create HTTP client with short timeout
|
||||
let client = reqwest::Client::builder()
|
||||
let client = match reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(5))
|
||||
.build()
|
||||
.ok()?;
|
||||
{
|
||||
Ok(client) => client,
|
||||
Err(_) => return (None, false),
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
@ -852,16 +856,12 @@ impl ServiceCollector {
|
||||
match client.head(&url).send().await {
|
||||
Ok(response) => {
|
||||
let latency = start.elapsed().as_millis() as f32;
|
||||
if response.status().is_success() || response.status().is_redirection() {
|
||||
Some(latency)
|
||||
} else {
|
||||
// Site is reachable but returned error, still measure latency
|
||||
Some(latency)
|
||||
}
|
||||
let is_healthy = response.status().is_success() || response.status().is_redirection();
|
||||
(Some(latency), is_healthy)
|
||||
}
|
||||
Err(_) => {
|
||||
// Connection failed, no latency measurement
|
||||
None
|
||||
// Connection failed, no latency measurement, not healthy
|
||||
(None, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1355,13 +1355,14 @@ impl Collector for ServiceCollector {
|
||||
// Add nginx sites as individual sub-services
|
||||
if let Some(sites) = self.get_nginx_sites().await {
|
||||
for site in sites.iter() {
|
||||
// Measure latency for this site
|
||||
let latency = self.measure_site_latency(site).await;
|
||||
// Measure latency and health for this site
|
||||
let (latency, is_healthy) = self.measure_site_latency(site).await;
|
||||
|
||||
// Determine status and description based on latency measurement
|
||||
let (site_status, site_description) = match latency {
|
||||
Some(_ms) => (ServiceStatus::Running, None),
|
||||
None => (ServiceStatus::Stopped, Some(vec!["unreachable".to_string()])),
|
||||
// Determine status and description based on latency and health
|
||||
let (site_status, site_description) = match (latency, is_healthy) {
|
||||
(Some(_ms), true) => (ServiceStatus::Running, None),
|
||||
(Some(_ms), false) => (ServiceStatus::Stopped, Some(vec!["error".to_string()])),
|
||||
(None, _) => (ServiceStatus::Stopped, Some(vec!["unreachable".to_string()])),
|
||||
};
|
||||
|
||||
// Update counters based on site status
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user