Compare commits
2 Commits
0cb69ea8fa
...
407329657f
| Author | SHA1 | Date | |
|---|---|---|---|
| 407329657f | |||
| a64464142c |
@ -986,21 +986,11 @@ impl ServiceCollector {
|
||||
}
|
||||
|
||||
|
||||
// Check which sites are actually accessible
|
||||
let mut accessible_sites = Vec::new();
|
||||
for site in sites {
|
||||
if self.check_site_accessibility(&site).await {
|
||||
accessible_sites.push(site); // Remove checkmark - status will be shown via sub_service row status
|
||||
}
|
||||
}
|
||||
|
||||
// Limit to reasonable number
|
||||
accessible_sites.truncate(15);
|
||||
|
||||
if accessible_sites.is_empty() {
|
||||
// Return all sites from nginx config (monitor all, regardless of current status)
|
||||
if sites.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(accessible_sites)
|
||||
Some(sites)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1048,34 +1038,6 @@ impl ServiceCollector {
|
||||
}
|
||||
}
|
||||
|
||||
async fn check_site_accessibility(&self, hostname: &str) -> bool {
|
||||
// Create HTTP client with same timeout as site latency checks
|
||||
let client = match reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(2))
|
||||
.build()
|
||||
{
|
||||
Ok(client) => client,
|
||||
Err(_) => return false,
|
||||
};
|
||||
|
||||
// Try HTTPS first, then HTTP
|
||||
for scheme in ["https", "http"] {
|
||||
let url = format!("{}://{}", scheme, hostname);
|
||||
|
||||
match client.get(&url).send().await {
|
||||
Ok(response) => {
|
||||
let status = response.status().as_u16();
|
||||
// Check for successful HTTP status codes (same logic as before)
|
||||
if status == 200 || status == 301 || status == 302 || status == 403 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
Err(_) => continue,
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
async fn get_nginx_description(&self) -> Option<String> {
|
||||
// Get site count and active connections
|
||||
|
||||
@ -163,164 +163,48 @@ impl SimpleAgent {
|
||||
}
|
||||
|
||||
async fn check_status_changes(&mut self, output: &crate::collectors::CollectorOutput) {
|
||||
// Extract status from collector output and check for changes
|
||||
match output.agent_type {
|
||||
AgentType::Service => {
|
||||
if let Some(summary) = output.data.get("summary") {
|
||||
// Check services status
|
||||
if let Some(services_status) = summary.get("services_status").and_then(|v| v.as_str()) {
|
||||
let details = self.build_service_failure_details(output);
|
||||
if let Some(change) = self.notification_manager.update_status_with_details("system", "services", services_status, details) {
|
||||
self.notification_manager.send_notification(change).await;
|
||||
// Generic status change detection for all agents
|
||||
self.scan_for_status_changes(&output.data, &format!("{:?}", output.agent_type)).await;
|
||||
}
|
||||
|
||||
async fn scan_for_status_changes(&mut self, data: &serde_json::Value, agent_name: &str) {
|
||||
// Recursively scan JSON for any field ending in "_status"
|
||||
self.scan_object_for_status(data, agent_name, "").await;
|
||||
}
|
||||
|
||||
async fn scan_object_for_status(&mut self, value: &serde_json::Value, agent_name: &str, path: &str) {
|
||||
match value {
|
||||
serde_json::Value::Object(obj) => {
|
||||
for (key, val) in obj {
|
||||
let current_path = if path.is_empty() { key.clone() } else { format!("{}.{}", path, key) };
|
||||
|
||||
if key.ends_with("_status") && val.is_string() {
|
||||
// Found a status field - check for changes
|
||||
if let Some(status) = val.as_str() {
|
||||
let component = agent_name.to_lowercase();
|
||||
let metric = key.trim_end_matches("_status");
|
||||
let description = format!("Agent: {}, Component: {}, Source: {}", agent_name, component, current_path);
|
||||
|
||||
if let Some(change) = self.notification_manager.update_status_with_details(&component, metric, status, Some(description)) {
|
||||
info!("Status change: {} {} -> {}", current_path, change.old_status, change.new_status);
|
||||
self.notification_manager.send_notification(change).await;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Recursively scan nested objects
|
||||
self.scan_object_for_status(val, agent_name, ¤t_path).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
AgentType::Smart => {
|
||||
if let Some(status) = output.data.get("status").and_then(|v| v.as_str()) {
|
||||
let normalized_status = match status {
|
||||
"HEALTHY" => "ok",
|
||||
"WARNING" => "warning",
|
||||
"CRITICAL" => "critical",
|
||||
_ => "unknown"
|
||||
};
|
||||
if let Some(change) = self.notification_manager.update_status("storage", "smart", normalized_status) {
|
||||
self.notification_manager.send_notification(change).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
AgentType::System => {
|
||||
if let Some(summary) = output.data.get("summary") {
|
||||
// Check CPU status
|
||||
if let Some(cpu_status) = summary.get("cpu_status").and_then(|v| v.as_str()) {
|
||||
let cpu_details = self.build_cpu_details(summary);
|
||||
if let Some(change) = self.notification_manager.update_status_with_details("system", "cpu", cpu_status, cpu_details) {
|
||||
info!("CPU status change detected: {} -> {}", change.old_status, change.new_status);
|
||||
self.notification_manager.send_notification(change).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Check memory status
|
||||
if let Some(memory_status) = summary.get("memory_status").and_then(|v| v.as_str()) {
|
||||
let memory_details = self.build_memory_details(summary);
|
||||
if let Some(change) = self.notification_manager.update_status_with_details("system", "memory", memory_status, memory_details) {
|
||||
info!("Memory status change detected: {} -> {}", change.old_status, change.new_status);
|
||||
self.notification_manager.send_notification(change).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Check CPU temp status (optional)
|
||||
if let Some(cpu_temp_status) = summary.get("cpu_temp_status").and_then(|v| v.as_str()) {
|
||||
let temp_details = self.build_cpu_temp_details(summary);
|
||||
if let Some(change) = self.notification_manager.update_status_with_details("system", "cpu_temp", cpu_temp_status, temp_details) {
|
||||
info!("CPU temp status change detected: {} -> {}", change.old_status, change.new_status);
|
||||
self.notification_manager.send_notification(change).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
AgentType::Backup => {
|
||||
if let Some(status) = output.data.get("overall_status") {
|
||||
let status_str = match status.as_str() {
|
||||
Some("Healthy") => "ok",
|
||||
Some("Warning") => "warning",
|
||||
Some("Failed") => "critical",
|
||||
_ => "unknown"
|
||||
};
|
||||
if let Some(change) = self.notification_manager.update_status("backup", "overall", status_str) {
|
||||
self.notification_manager.send_notification(change).await;
|
||||
}
|
||||
serde_json::Value::Array(arr) => {
|
||||
// Scan array elements for individual item status tracking
|
||||
for (index, item) in arr.iter().enumerate() {
|
||||
let item_path = format!("{}[{}]", path, index);
|
||||
self.scan_object_for_status(item, agent_name, &item_path).await;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn build_cpu_details(&self, summary: &serde_json::Value) -> Option<String> {
|
||||
let cpu_load_1 = summary.get("cpu_load_1").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let cpu_load_5 = summary.get("cpu_load_5").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let cpu_load_15 = summary.get("cpu_load_15").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
|
||||
Some(format!("CPU load (1/5/15min): {:.2} / {:.2} / {:.2}", cpu_load_1, cpu_load_5, cpu_load_15))
|
||||
}
|
||||
|
||||
fn build_memory_details(&self, summary: &serde_json::Value) -> Option<String> {
|
||||
let used_mb = summary.get("memory_used_mb").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let total_mb = summary.get("memory_total_mb").and_then(|v| v.as_f64()).unwrap_or(1.0);
|
||||
let usage_percent = summary.get("memory_usage_percent").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
|
||||
Some(format!("Memory usage: {:.1} / {:.1} GB ({:.1}%)", used_mb / 1024.0, total_mb / 1024.0, usage_percent))
|
||||
}
|
||||
|
||||
fn build_cpu_temp_details(&self, summary: &serde_json::Value) -> Option<String> {
|
||||
if let Some(temp_c) = summary.get("cpu_temp_c").and_then(|v| v.as_f64()) {
|
||||
Some(format!("CPU temperature: {:.1}°C", temp_c))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn build_service_failure_details(&self, output: &crate::collectors::CollectorOutput) -> Option<String> {
|
||||
if let Some(services) = output.data.get("services").and_then(|v| v.as_array()) {
|
||||
let mut failed_services = Vec::new();
|
||||
let mut degraded_services = Vec::new();
|
||||
|
||||
for service in services {
|
||||
if let (Some(name), Some(status)) = (
|
||||
service.get("name").and_then(|v| v.as_str()),
|
||||
service.get("status").and_then(|v| v.as_str())
|
||||
) {
|
||||
match status {
|
||||
"Stopped" => {
|
||||
let memory = service.get("memory_used_mb")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
let disk = service.get("disk_used_gb")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
failed_services.push(format!("{} (stopped, was using {:.1}MB RAM, {:.1}GB disk)",
|
||||
name, memory, disk));
|
||||
},
|
||||
"Degraded" | "Restarting" => {
|
||||
let memory = service.get("memory_used_mb")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
let disk = service.get("disk_used_gb")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
degraded_services.push(format!("{} ({}, using {:.1}MB RAM, {:.1}GB disk)",
|
||||
name, status.to_lowercase(), memory, disk));
|
||||
},
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !failed_services.is_empty() || !degraded_services.is_empty() {
|
||||
let mut details = String::new();
|
||||
|
||||
if !failed_services.is_empty() {
|
||||
details.push_str("Failed services:\n");
|
||||
for service in &failed_services {
|
||||
details.push_str(&format!("- {}\n", service));
|
||||
}
|
||||
}
|
||||
|
||||
if !degraded_services.is_empty() {
|
||||
if !details.is_empty() {
|
||||
details.push('\n');
|
||||
}
|
||||
details.push_str("Degraded services:\n");
|
||||
for service in °raded_services {
|
||||
details.push_str(&format!("- {}\n", service));
|
||||
}
|
||||
}
|
||||
|
||||
Some(details.trim_end().to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -109,10 +109,17 @@ fn render_metrics(
|
||||
// Add latency information for nginx sites if available
|
||||
let service_name_with_latency = if let Some(parent) = &svc.sub_service {
|
||||
if parent == "nginx" {
|
||||
// Extract subdomain part for shorter display
|
||||
let short_name = if let Some(dot_pos) = svc.name.find('.') {
|
||||
&svc.name[..dot_pos]
|
||||
} else {
|
||||
&svc.name
|
||||
};
|
||||
|
||||
match &svc.latency_ms {
|
||||
Some(latency) if *latency >= 2000.0 => format!("{} → unreachable", svc.name), // Timeout (2s+)
|
||||
Some(latency) => format!("{} → {:.0}ms", svc.name, latency),
|
||||
None => format!("{} → unreachable", svc.name), // Connection failed
|
||||
Some(latency) if *latency >= 2000.0 => format!("{} → unreachable", short_name), // Timeout (2s+)
|
||||
Some(latency) => format!("{} → {:.0}ms", short_name, latency),
|
||||
None => format!("{} → unreachable", short_name), // Connection failed
|
||||
}
|
||||
} else {
|
||||
svc.name.clone()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user