This commit is contained in:
2025-10-13 00:28:06 +02:00
parent 57b676ad25
commit c68ccf023e
4 changed files with 335 additions and 84 deletions

View File

@@ -41,6 +41,7 @@ pub struct StatusChange {
pub struct NotificationManager {
config: NotificationConfig,
last_status: HashMap<String, String>, // key: "component.metric", value: status
last_details: HashMap<String, String>, // key: "component.metric", value: details from warning/critical
last_notification: HashMap<String, DateTime<Utc>>, // Rate limiting
}
@@ -49,6 +50,7 @@ impl NotificationManager {
Self {
config,
last_status: HashMap::new(),
last_details: HashMap::new(),
last_notification: HashMap::new(),
}
}
@@ -63,16 +65,39 @@ impl NotificationManager {
if let Some(old) = &old_status {
if old != status {
// For recovery notifications, include original problem details
let change_details = if status == "ok" && (old == "warning" || old == "critical") {
// Recovery: combine current status details with what we recovered from
let old_details = self.last_details.get(&key).cloned();
match (old_details, &details) {
(Some(old_detail), Some(current_detail)) => Some(format!("Recovered from: {}\nCurrent status: {}", old_detail, current_detail)),
(Some(old_detail), None) => Some(format!("Recovered from: {}", old_detail)),
(None, current) => current.clone(),
}
} else {
details.clone()
};
let change = StatusChange {
component: component.to_string(),
metric: metric.to_string(),
old_status: old.clone(),
new_status: status.to_string(),
timestamp: Utc::now(),
details,
details: change_details,
};
self.last_status.insert(key, status.to_string());
self.last_status.insert(key.clone(), status.to_string());
// Store details for warning/critical states (for future recovery notifications)
if status == "warning" || status == "critical" {
if let Some(ref detail) = details {
self.last_details.insert(key.clone(), detail.clone());
}
} else if status == "ok" {
// Clear stored details after recovery
self.last_details.remove(&key);
}
if self.should_notify(&change) {
return Some(change);
@@ -80,7 +105,10 @@ impl NotificationManager {
}
} else {
// First time seeing this metric - store but don't notify
self.last_status.insert(key, status.to_string());
self.last_status.insert(key.clone(), status.to_string());
if (status == "warning" || status == "critical") && details.is_some() {
self.last_details.insert(key, details.unwrap());
}
}
None

View File

@@ -193,7 +193,8 @@ impl SimpleAgent {
if let Some(summary) = output.data.get("summary") {
// Check CPU status
if let Some(cpu_status) = summary.get("cpu_status").and_then(|v| v.as_str()) {
if let Some(change) = self.notification_manager.update_status("system", "cpu", cpu_status) {
let cpu_details = self.build_cpu_details(summary);
if let Some(change) = self.notification_manager.update_status_with_details("system", "cpu", cpu_status, cpu_details) {
info!("CPU status change detected: {} -> {}", change.old_status, change.new_status);
self.notification_manager.send_notification(change).await;
}
@@ -201,7 +202,8 @@ impl SimpleAgent {
// Check memory status
if let Some(memory_status) = summary.get("memory_status").and_then(|v| v.as_str()) {
if let Some(change) = self.notification_manager.update_status("system", "memory", memory_status) {
let memory_details = self.build_memory_details(summary);
if let Some(change) = self.notification_manager.update_status_with_details("system", "memory", memory_status, memory_details) {
info!("Memory status change detected: {} -> {}", change.old_status, change.new_status);
self.notification_manager.send_notification(change).await;
}
@@ -209,7 +211,8 @@ impl SimpleAgent {
// Check CPU temp status (optional)
if let Some(cpu_temp_status) = summary.get("cpu_temp_status").and_then(|v| v.as_str()) {
if let Some(change) = self.notification_manager.update_status("system", "cpu_temp", cpu_temp_status) {
let temp_details = self.build_cpu_temp_details(summary);
if let Some(change) = self.notification_manager.update_status_with_details("system", "cpu_temp", cpu_temp_status, temp_details) {
info!("CPU temp status change detected: {} -> {}", change.old_status, change.new_status);
self.notification_manager.send_notification(change).await;
}
@@ -232,6 +235,30 @@ impl SimpleAgent {
}
}
fn build_cpu_details(&self, summary: &serde_json::Value) -> Option<String> {
let cpu_load_1 = summary.get("cpu_load_1").and_then(|v| v.as_f64()).unwrap_or(0.0);
let cpu_load_5 = summary.get("cpu_load_5").and_then(|v| v.as_f64()).unwrap_or(0.0);
let cpu_load_15 = summary.get("cpu_load_15").and_then(|v| v.as_f64()).unwrap_or(0.0);
Some(format!("CPU load (1/5/15min): {:.2} / {:.2} / {:.2}", cpu_load_1, cpu_load_5, cpu_load_15))
}
fn build_memory_details(&self, summary: &serde_json::Value) -> Option<String> {
let used_mb = summary.get("memory_used_mb").and_then(|v| v.as_f64()).unwrap_or(0.0);
let total_mb = summary.get("memory_total_mb").and_then(|v| v.as_f64()).unwrap_or(1.0);
let usage_percent = summary.get("memory_usage_percent").and_then(|v| v.as_f64()).unwrap_or(0.0);
Some(format!("Memory usage: {:.1} / {:.1} GB ({:.1}%)", used_mb / 1024.0, total_mb / 1024.0, usage_percent))
}
fn build_cpu_temp_details(&self, summary: &serde_json::Value) -> Option<String> {
if let Some(temp_c) = summary.get("cpu_temp_c").and_then(|v| v.as_f64()) {
Some(format!("CPU temperature: {:.1}°C", temp_c))
} else {
None
}
}
fn build_service_failure_details(&self, output: &crate::collectors::CollectorOutput) -> Option<String> {
if let Some(services) = output.data.get("services").and_then(|v| v.as_array()) {
let mut failed_services = Vec::new();