Implement comprehensive monitoring improvements

- Add full email notifications with lettre and Stockholm timezone - Add status persistence to prevent notification spam on restart - Change nginx monitoring to check backend proxy_pass URLs instead of frontend domains - Increase nginx site timeout to 10 seconds for backend health checks - Fix cache intervals: disk (5min), backup (10min), systemd (30s), cpu/memory (5s) - Remove rate limiting for immediate notifications on all status changes - Store metric status in /var/lib/cm-dashboard/last-status.json
2025-10-20 14:32:44 +02:00
parent ecaf3aedb5
commit 66a79574e0
5 changed files with 260 additions and 95 deletions
--- a/agent/src/metrics/mod.rs
+++ b/agent/src/metrics/mod.rs
@@ -182,11 +182,13 @@ impl MetricCollectionManager {
        for collector in &self.collectors {
            let collector_name = collector.name();

-            // Determine cache interval for this collector type - ALL REALTIME FOR FAST UPDATES
+            // Determine cache interval for this collector type based on data volatility
            let cache_interval_secs = match collector_name {
-                "cpu" | "memory" | "disk" | "systemd" => 2, // All realtime for fast updates
-                "backup" => 10, // Backup metrics every 10 seconds for testing
-                _ => 2,         // All realtime for fast updates
+                "cpu" | "memory" => 5,    // Fast updates for volatile metrics
+                "systemd" => 30,          // Service status changes less frequently
+                "disk" => 300,            // SMART data changes very slowly (5 minutes)
+                "backup" => 600,          // Backup status changes rarely (10 minutes)
+                _ => 30,                  // Default: moderate frequency
            };

            let should_collect =