Fix nginx site monitoring to properly detect errors
- Return error status for HTTP 502/5xx responses instead of success - Show 'error' description for sites with connectivity but wrong status codes - Show 'unreachable' description for complete connection failures - Each nginx site now has independent status based on actual health - Sites with timeouts or server errors will trigger notifications
This commit is contained in:
parent
e64527ce2f
commit
355a986582
@ -832,7 +832,8 @@ impl ServiceCollector {
|
|||||||
std::env::var("UID").unwrap_or_default() == "0"
|
std::env::var("UID").unwrap_or_default() == "0"
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn measure_site_latency(&self, site_name: &str) -> Option<f32> {
|
async fn measure_site_latency(&self, site_name: &str) -> (Option<f32>, bool) {
|
||||||
|
// Returns (latency, is_healthy)
|
||||||
// Construct URL from site name
|
// Construct URL from site name
|
||||||
let url = if site_name.contains("localhost") || site_name.contains("127.0.0.1") {
|
let url = if site_name.contains("localhost") || site_name.contains("127.0.0.1") {
|
||||||
format!("http://{}", site_name)
|
format!("http://{}", site_name)
|
||||||
@ -841,10 +842,13 @@ impl ServiceCollector {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Create HTTP client with short timeout
|
// Create HTTP client with short timeout
|
||||||
let client = reqwest::Client::builder()
|
let client = match reqwest::Client::builder()
|
||||||
.timeout(Duration::from_secs(5))
|
.timeout(Duration::from_secs(5))
|
||||||
.build()
|
.build()
|
||||||
.ok()?;
|
{
|
||||||
|
Ok(client) => client,
|
||||||
|
Err(_) => return (None, false),
|
||||||
|
};
|
||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
@ -852,16 +856,12 @@ impl ServiceCollector {
|
|||||||
match client.head(&url).send().await {
|
match client.head(&url).send().await {
|
||||||
Ok(response) => {
|
Ok(response) => {
|
||||||
let latency = start.elapsed().as_millis() as f32;
|
let latency = start.elapsed().as_millis() as f32;
|
||||||
if response.status().is_success() || response.status().is_redirection() {
|
let is_healthy = response.status().is_success() || response.status().is_redirection();
|
||||||
Some(latency)
|
(Some(latency), is_healthy)
|
||||||
} else {
|
|
||||||
// Site is reachable but returned error, still measure latency
|
|
||||||
Some(latency)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
// Connection failed, no latency measurement
|
// Connection failed, no latency measurement, not healthy
|
||||||
None
|
(None, false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1355,13 +1355,14 @@ impl Collector for ServiceCollector {
|
|||||||
// Add nginx sites as individual sub-services
|
// Add nginx sites as individual sub-services
|
||||||
if let Some(sites) = self.get_nginx_sites().await {
|
if let Some(sites) = self.get_nginx_sites().await {
|
||||||
for site in sites.iter() {
|
for site in sites.iter() {
|
||||||
// Measure latency for this site
|
// Measure latency and health for this site
|
||||||
let latency = self.measure_site_latency(site).await;
|
let (latency, is_healthy) = self.measure_site_latency(site).await;
|
||||||
|
|
||||||
// Determine status and description based on latency measurement
|
// Determine status and description based on latency and health
|
||||||
let (site_status, site_description) = match latency {
|
let (site_status, site_description) = match (latency, is_healthy) {
|
||||||
Some(_ms) => (ServiceStatus::Running, None),
|
(Some(_ms), true) => (ServiceStatus::Running, None),
|
||||||
None => (ServiceStatus::Stopped, Some(vec!["unreachable".to_string()])),
|
(Some(_ms), false) => (ServiceStatus::Stopped, Some(vec!["error".to_string()])),
|
||||||
|
(None, _) => (ServiceStatus::Stopped, Some(vec!["unreachable".to_string()])),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Update counters based on site status
|
// Update counters based on site status
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user