Files
cm-dashboard/agent/src/notifications.rs
Christoffer Martinsson cd4764596f Implement comprehensive dashboard improvements and maintenance mode
- Storage widget: Restructure with Name/Temp/Wear/Usage columns, SMART details as descriptions
- Host navigation: Only cycle through connected hosts, no disconnected hosts
- Auto-discovery: Skip config files, use predefined CMTEC host list
- Maintenance mode: Suppress notifications during backup via /tmp/cm-maintenance file
- CPU thresholds: Update to warning ≥9.0, critical ≥10.0 for production use
- Agent-dashboard separation: Agent provides descriptions, dashboard displays only
2025-10-13 11:18:23 +02:00

245 lines
8.9 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use std::collections::HashMap;
use std::path::Path;
use chrono::{DateTime, Utc};
use chrono_tz::Europe::Stockholm;
use lettre::{Message, SmtpTransport, Transport};
use serde::{Deserialize, Serialize};
use tracing::{info, error, warn};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NotificationConfig {
pub enabled: bool,
pub smtp_host: String,
pub smtp_port: u16,
pub from_email: String,
pub to_email: String,
pub rate_limit_minutes: u64,
}
impl Default for NotificationConfig {
fn default() -> Self {
Self {
enabled: false,
smtp_host: "localhost".to_string(),
smtp_port: 25,
from_email: "".to_string(),
to_email: "".to_string(),
rate_limit_minutes: 30, // Don't spam notifications
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct StatusChange {
pub component: String,
pub metric: String,
pub old_status: String,
pub new_status: String,
pub timestamp: DateTime<Utc>,
pub details: Option<String>,
}
pub struct NotificationManager {
config: NotificationConfig,
last_status: HashMap<String, String>, // key: "component.metric", value: status
last_details: HashMap<String, String>, // key: "component.metric", value: details from warning/critical
last_notification: HashMap<String, DateTime<Utc>>, // Rate limiting
}
impl NotificationManager {
pub fn new(config: NotificationConfig) -> Self {
Self {
config,
last_status: HashMap::new(),
last_details: HashMap::new(),
last_notification: HashMap::new(),
}
}
pub fn update_status(&mut self, component: &str, metric: &str, status: &str) -> Option<StatusChange> {
self.update_status_with_details(component, metric, status, None)
}
pub fn update_status_with_details(&mut self, component: &str, metric: &str, status: &str, details: Option<String>) -> Option<StatusChange> {
let key = format!("{}.{}", component, metric);
let old_status = self.last_status.get(&key).cloned();
if let Some(old) = &old_status {
if old != status {
// For recovery notifications, include original problem details
let change_details = if status == "ok" && (old == "warning" || old == "critical") {
// Recovery: combine current status details with what we recovered from
let old_details = self.last_details.get(&key).cloned();
match (old_details, &details) {
(Some(old_detail), Some(current_detail)) => Some(format!("Recovered from: {}\nCurrent status: {}", old_detail, current_detail)),
(Some(old_detail), None) => Some(format!("Recovered from: {}", old_detail)),
(None, current) => current.clone(),
}
} else {
details.clone()
};
let change = StatusChange {
component: component.to_string(),
metric: metric.to_string(),
old_status: old.clone(),
new_status: status.to_string(),
timestamp: Utc::now(),
details: change_details,
};
self.last_status.insert(key.clone(), status.to_string());
// Store details for warning/critical states (for future recovery notifications)
if status == "warning" || status == "critical" {
if let Some(ref detail) = details {
self.last_details.insert(key.clone(), detail.clone());
}
} else if status == "ok" {
// Clear stored details after recovery
self.last_details.remove(&key);
}
if self.should_notify(&change) {
return Some(change);
}
}
} else {
// First time seeing this metric - store but don't notify
self.last_status.insert(key.clone(), status.to_string());
if (status == "warning" || status == "critical") && details.is_some() {
self.last_details.insert(key, details.unwrap());
}
}
None
}
fn should_notify(&mut self, change: &StatusChange) -> bool {
if !self.config.enabled {
info!("Notifications disabled, skipping {}.{}", change.component, change.metric);
return false;
}
// Only notify on transitions to warning/critical, or recovery to ok
let should_send = match (change.old_status.as_str(), change.new_status.as_str()) {
(_, "warning") | (_, "critical") => true,
("warning" | "critical", "ok") => true,
_ => false,
};
info!("Status change {}.{}: {} -> {} (notify: {})",
change.component, change.metric, change.old_status, change.new_status, should_send);
should_send
}
fn is_rate_limited(&mut self, change: &StatusChange) -> bool {
let key = format!("{}.{}", change.component, change.metric);
if let Some(last_time) = self.last_notification.get(&key) {
let minutes_since = Utc::now().signed_duration_since(*last_time).num_minutes();
if minutes_since < self.config.rate_limit_minutes as i64 {
info!("Rate limiting {}.{}: {} minutes since last notification (limit: {})",
change.component, change.metric, minutes_since, self.config.rate_limit_minutes);
return true;
}
}
self.last_notification.insert(key.clone(), Utc::now());
info!("Not rate limited {}.{}, sending notification", change.component, change.metric);
false
}
fn is_maintenance_mode() -> bool {
Path::new("/tmp/cm-maintenance").exists()
}
pub async fn send_notification(&mut self, change: StatusChange) {
if !self.config.enabled {
return;
}
if Self::is_maintenance_mode() {
info!("Suppressing notification for {}.{} (maintenance mode active)", change.component, change.metric);
return;
}
if self.is_rate_limited(&change) {
warn!("Rate limiting notification for {}.{}", change.component, change.metric);
return;
}
let subject = self.format_subject(&change);
let body = self.format_body(&change);
if let Err(e) = self.send_email(&subject, &body).await {
error!("Failed to send notification email: {}", e);
} else {
info!("Sent notification: {} {}.{} {} → {}",
change.component, change.component, change.metric,
change.old_status, change.new_status);
}
}
fn format_subject(&self, change: &StatusChange) -> String {
let urgency = match change.new_status.as_str() {
"critical" => "🔴 CRITICAL",
"warning" => "🟡 WARNING",
"ok" => "✅ RESOLVED",
_ => " STATUS",
};
format!("{}: {} {} on {}",
urgency,
change.component,
change.metric,
gethostname::gethostname().to_string_lossy())
}
fn format_body(&self, change: &StatusChange) -> String {
let mut body = format!(
"Status Change Alert\n\
\n\
Host: {}\n\
Component: {}\n\
Metric: {}\n\
Status Change: {}{}\n\
Time: {}",
gethostname::gethostname().to_string_lossy(),
change.component,
change.metric,
change.old_status,
change.new_status,
change.timestamp.with_timezone(&Stockholm).format("%Y-%m-%d %H:%M:%S CET/CEST")
);
if let Some(details) = &change.details {
body.push_str(&format!("\n\nDetails:\n{}", details));
}
body.push_str(&format!(
"\n\n--\n\
CM Dashboard Agent\n\
Generated at {}",
Utc::now().with_timezone(&Stockholm).format("%Y-%m-%d %H:%M:%S CET/CEST")
));
body
}
async fn send_email(&self, subject: &str, body: &str) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let email = Message::builder()
.from(self.config.from_email.parse()?)
.to(self.config.to_email.parse()?)
.subject(subject)
.body(body.to_string())?;
let mailer = SmtpTransport::builder_dangerous(&self.config.smtp_host)
.port(self.config.smtp_port)
.build();
mailer.send(&email)?;
Ok(())
}
}