From 9e344fb66d48bce166538ddc5d2265046b9c926b Mon Sep 17 00:00:00 2001 From: Christoffer Martinsson Date: Sun, 12 Oct 2025 22:31:46 +0200 Subject: [PATCH] Testing --- CLAUDE.md | 106 +++++++++++++++++++++++++++----- Cargo.lock | 88 ++++++++++++++++++++++++++ agent/Cargo.toml | 3 +- agent/src/collectors/backup.rs | 38 ++++-------- agent/src/collectors/mod.rs | 8 --- agent/src/collectors/service.rs | 24 ++++---- agent/src/collectors/smart.rs | 18 ++---- agent/src/notifications.rs | 20 ++++-- agent/src/simple_agent.rs | 11 +++- dashboard/src/app.rs | 2 +- dashboard/src/data/metrics.rs | 4 +- dashboard/src/ui/alerts.rs | 43 +++---------- dashboard/src/ui/backup.rs | 54 +--------------- dashboard/src/ui/services.rs | 22 +------ dashboard/src/ui/storage.rs | 19 +----- dashboard/src/ui/system.rs | 86 +++++++++----------------- dashboard/src/ui/widget.rs | 34 ---------- 17 files changed, 283 insertions(+), 297 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 0a672c7..123e64f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -184,25 +184,103 @@ Keys: [←→] hosts [r]efresh [q]uit Keys: [Enter] details [r]efresh [s]ort [f]ilter [q]uit ``` -## Development Status +## Architecture Principles - CRITICAL -### Immediate TODOs +### Agent-Dashboard Separation of Concerns -- Refactor all dashboard widgets to use a shared table/layout helper so icons, padding, and titles remain consistent across panels +**AGENT IS SINGLE SOURCE OF TRUTH FOR ALL STATUS CALCULATIONS** +- Agent calculates status ("ok"/"warning"/"critical"/"unknown") using defined thresholds +- Agent sends status to dashboard via ZMQ +- Dashboard NEVER calculates status - only displays what agent provides -- Investigate why the backup metrics agent is not publishing data to the dashboard -- Resize the services widget so it can display more services without truncation -- Remove the dedicated status widget and redistribute the layout space -- Add responsive scaling within each widget so columns and content adapt dynamically +**Data Flow Architecture:** +``` +Agent (calculations + thresholds) → Status → Dashboard (display only) → TableBuilder (colors) +``` -### Phase 3: Advanced Features 🚧 IN PROGRESS +**Status Handling Rules:** +- Agent provides status → Dashboard uses agent status +- Agent doesn't provide status → Dashboard shows "unknown" (NOT "ok") +- Dashboard widgets NEVER contain hardcoded thresholds +- TableBuilder converts status to colors for display -- [x] ZMQ gossip network implementation -- [x] Comprehensive error handling -- [x] Performance optimizations -- [ ] Predictive analytics for wear levels -- [ ] Custom alert rules engine -- [ ] Historical data export capabilities +### Current Agent Thresholds (as of 2025-10-12) + +**CPU Load (service.rs:392-400):** +- Warning: ≥ 2.0 (testing value, was 5.0) +- Critical: ≥ 4.0 (testing value, was 8.0) + +**CPU Temperature (service.rs:412-420):** +- Warning: ≥ 70.0°C +- Critical: ≥ 80.0°C + +**Memory Usage (service.rs:402-410):** +- Warning: ≥ 80% +- Critical: ≥ 95% + +### Email Notifications + +**System Configuration:** +- From: `{hostname}@cmtec.se` (e.g., cmbox@cmtec.se) +- To: `cm@cmtec.se` +- SMTP: localhost:25 (postfix) +- Timezone: Europe/Stockholm (not UTC) + +**Notification Triggers:** +- Status degradation: any → "warning" or "critical" +- Recovery: "warning"/"critical" → "ok" +- Rate limiting: configurable (set to 0 for testing, 30 minutes for production) + +**Monitored Components:** +- system.cpu (load status) +- system.cpu_temp (temperature status) +- system.memory (usage status) +- system.services (service health status) +- storage.smart (drive health) +- backup.overall (backup status) + +### Pure Auto-Discovery Implementation + +**Agent Configuration:** +- No config files required +- Auto-detects storage devices, services, backup systems +- Runtime discovery of system capabilities +- CLI: `cm-dashboard-agent [-v]` (only verbose flag) + +**Service Discovery:** +- Scans running systemd services +- Filters by predefined interesting patterns (gitea, nginx, docker, etc.) +- No host-specific hardcoded service lists + +### Current Implementation Status + +**Completed:** +- [x] Pure auto-discovery agent (no config files) +- [x] Agent-side status calculations with defined thresholds +- [x] Dashboard displays agent status (no dashboard calculations) +- [x] Email notifications with Stockholm timezone +- [x] CPU temperature monitoring and notifications +- [x] ZMQ message format standardization +- [x] Removed all hardcoded dashboard thresholds + +**Testing Configuration (REVERT FOR PRODUCTION):** +- CPU thresholds lowered to 2.0/4.0 for easy testing +- Email rate limiting disabled (0 minutes) + +### Development Guidelines + +**When Adding New Metrics:** +1. Agent calculates status with thresholds +2. Agent adds `{metric}_status` field to JSON output +3. Dashboard data structure adds `{metric}_status: Option` +4. Dashboard uses `status_level_from_agent_status()` for display +5. Agent adds notification monitoring for status changes + +**NEVER:** +- Add hardcoded thresholds to dashboard widgets +- Calculate status in dashboard with different thresholds than agent +- Use "ok" as default when agent status is missing (use "unknown") +- Calculate colors in widgets (TableBuilder's responsibility) # Important Communication Guidelines diff --git a/Cargo.lock b/Cargo.lock index 89c34f6..b15c1be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -220,6 +220,28 @@ dependencies = [ "windows-link", ] +[[package]] +name = "chrono-tz" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + [[package]] name = "chumsky" version = "0.9.3" @@ -298,6 +320,7 @@ dependencies = [ "anyhow", "async-trait", "chrono", + "chrono-tz", "clap", "cm-dashboard-shared", "futures", @@ -1078,6 +1101,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + [[package]] name = "paste" version = "1.0.15" @@ -1090,6 +1122,44 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1248,6 +1318,18 @@ dependencies = [ "bitflags 2.9.4", ] +[[package]] +name = "regex" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a52d8d02cacdb176ef4678de6c052efb4b3da14b78e4db683a4252762be5433" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + [[package]] name = "regex-automata" version = "0.4.12" @@ -1395,6 +1477,12 @@ dependencies = [ "libc", ] +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.11" diff --git a/agent/Cargo.toml b/agent/Cargo.toml index 5d152a8..3bb06e4 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -10,7 +10,8 @@ async-trait = "0.1" clap = { version = "4.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -chrono = { version = "0.4", features = ["serde"] } +chrono = { version = "0.4", features = ["serde", "clock"] } +chrono-tz = "0.8" thiserror = "1.0" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] } diff --git a/agent/src/collectors/backup.rs b/agent/src/collectors/backup.rs index c0e4652..fb6aa55 100644 --- a/agent/src/collectors/backup.rs +++ b/agent/src/collectors/backup.rs @@ -12,7 +12,6 @@ use super::{AgentType, Collector, CollectorError, CollectorOutput}; #[derive(Debug, Clone)] pub struct BackupCollector { - pub enabled: bool, pub interval: Duration, pub restic_repo: Option, pub backup_service: String, @@ -21,13 +20,12 @@ pub struct BackupCollector { impl BackupCollector { pub fn new( - enabled: bool, + _enabled: bool, interval_ms: u64, restic_repo: Option, backup_service: String, ) -> Self { Self { - enabled, interval: Duration::from_millis(interval_ms), restic_repo, backup_service, @@ -300,13 +298,6 @@ impl Collector for BackupCollector { self.interval } - fn is_enabled(&self) -> bool { - self.enabled - } - - fn requires_root(&self) -> bool { - false // Depends on restic repo permissions - } async fn collect(&self) -> Result { // Try to get borgbackup metrics first, fall back to restic if not available @@ -383,9 +374,17 @@ impl Collector for BackupCollector { last_message: None, }); + // Convert BackupStatus to standardized string format + let status_string = match overall_status { + BackupStatus::Healthy => "ok", + BackupStatus::Warning => "warning", + BackupStatus::Failed => "critical", + BackupStatus::Unknown => "unknown", + }; + // Add disk information if available from borgbackup metrics let mut backup_json = json!({ - "overall_status": overall_status, + "overall_status": status_string, "backup": backup_info, "service": service_data, "timestamp": Utc::now() @@ -407,7 +406,6 @@ impl Collector for BackupCollector { Ok(CollectorOutput { agent_type: AgentType::Backup, data: backup_metrics, - timestamp: Utc::now(), }) } } @@ -457,39 +455,25 @@ struct JournalEntry { // Borgbackup metrics structure from backup script #[derive(Debug, Deserialize)] struct BorgbackupMetrics { - backup_name: String, - start_time: String, - end_time: String, - duration_seconds: i64, status: String, - exit_codes: ExitCodes, repository: Repository, backup_disk: BackupDisk, timestamp: i64, } -#[derive(Debug, Deserialize)] -struct ExitCodes { - global: i32, - backup: i32, - prune: i32, - compact: i32, -} - #[derive(Debug, Deserialize)] struct Repository { total_archives: i32, latest_archive_size_bytes: i64, total_repository_size_bytes: i64, - path: String, } + #[derive(Debug, Deserialize)] struct BackupDisk { device: String, health: String, total_bytes: i64, used_bytes: i64, - available_bytes: i64, usage_percent: f32, } diff --git a/agent/src/collectors/mod.rs b/agent/src/collectors/mod.rs index 8834fec..4433afe 100644 --- a/agent/src/collectors/mod.rs +++ b/agent/src/collectors/mod.rs @@ -1,5 +1,4 @@ use async_trait::async_trait; -use chrono::{DateTime, Utc}; use serde_json::Value; use std::time::Duration; @@ -17,7 +16,6 @@ pub use cm_dashboard_shared::envelope::AgentType; pub struct CollectorOutput { pub agent_type: AgentType, pub data: Value, - pub timestamp: DateTime, } #[async_trait] @@ -26,10 +24,4 @@ pub trait Collector: Send + Sync { fn agent_type(&self) -> AgentType; fn collect_interval(&self) -> Duration; async fn collect(&self) -> Result; - fn is_enabled(&self) -> bool { - true - } - fn requires_root(&self) -> bool { - false - } } diff --git a/agent/src/collectors/service.rs b/agent/src/collectors/service.rs index 048b4fb..68a2778 100644 --- a/agent/src/collectors/service.rs +++ b/agent/src/collectors/service.rs @@ -13,7 +13,6 @@ use super::{AgentType, Collector, CollectorError, CollectorOutput}; #[derive(Debug, Clone)] pub struct ServiceCollector { - pub enabled: bool, pub interval: Duration, pub services: Vec, pub timeout_ms: u64, @@ -29,9 +28,8 @@ pub(crate) struct CpuSample { } impl ServiceCollector { - pub fn new(enabled: bool, interval_ms: u64, services: Vec) -> Self { + pub fn new(_enabled: bool, interval_ms: u64, services: Vec) -> Self { Self { - enabled, interval: Duration::from_millis(interval_ms), services, timeout_ms: 10000, // 10 second timeout for service checks @@ -409,6 +407,16 @@ impl ServiceCollector { } } + fn determine_cpu_temp_status(&self, temp_c: f32) -> String { + if temp_c >= 80.0 { + "critical".to_string() + } else if temp_c >= 70.0 { + "warning".to_string() + } else { + "ok".to_string() + } + } + fn determine_services_status(&self, healthy: usize, degraded: usize, failed: usize) -> String { if failed > 0 { "critical".to_string() @@ -929,13 +937,6 @@ impl Collector for ServiceCollector { self.interval } - fn is_enabled(&self) -> bool { - self.enabled - } - - fn requires_root(&self) -> bool { - false // Most systemctl commands work without root - } async fn collect(&self) -> Result { let mut services = Vec::new(); @@ -1013,6 +1014,7 @@ impl Collector for ServiceCollector { let cpu_cstate_info = self.get_cpu_cstate_info().await; let cpu_temp_c = self.get_cpu_temperature_c().await; + let cpu_temp_status = cpu_temp_c.map(|temp| self.determine_cpu_temp_status(temp)); let (gpu_load_percent, gpu_temp_c) = self.get_gpu_metrics().await; // If no specific quotas are set, use system memory as reference @@ -1039,6 +1041,7 @@ impl Collector for ServiceCollector { "cpu_status": cpu_status, "cpu_cstate": cpu_cstate_info, "cpu_temp_c": cpu_temp_c, + "cpu_temp_status": cpu_temp_status, "gpu_load_percent": gpu_load_percent, "gpu_temp_c": gpu_temp_c, }, @@ -1049,7 +1052,6 @@ impl Collector for ServiceCollector { Ok(CollectorOutput { agent_type: AgentType::Service, data: service_metrics, - timestamp: Utc::now(), }) } } diff --git a/agent/src/collectors/smart.rs b/agent/src/collectors/smart.rs index 3db0dfb..4f38e5c 100644 --- a/agent/src/collectors/smart.rs +++ b/agent/src/collectors/smart.rs @@ -12,16 +12,14 @@ use super::{AgentType, Collector, CollectorError, CollectorOutput}; #[derive(Debug, Clone)] pub struct SmartCollector { - pub enabled: bool, pub interval: Duration, pub devices: Vec, pub timeout_ms: u64, } impl SmartCollector { - pub fn new(enabled: bool, interval_ms: u64, devices: Vec) -> Self { + pub fn new(_enabled: bool, interval_ms: u64, devices: Vec) -> Self { Self { - enabled, interval: Duration::from_millis(interval_ms), devices, timeout_ms: 30000, // 30 second timeout for smartctl @@ -274,13 +272,6 @@ impl Collector for SmartCollector { self.interval } - fn is_enabled(&self) -> bool { - self.enabled - } - - fn requires_root(&self) -> bool { - true // smartctl typically requires root access - } async fn collect(&self) -> Result { let mut drives = Vec::new(); @@ -327,11 +318,11 @@ impl Collector for SmartCollector { let disk_usage = self.get_disk_usage().await?; let status = if critical > 0 { - "CRITICAL" + "critical" } else if warning > 0 { - "WARNING" + "warning" } else { - "HEALTHY" + "ok" }; let smart_metrics = json!({ @@ -352,7 +343,6 @@ impl Collector for SmartCollector { Ok(CollectorOutput { agent_type: AgentType::Smart, data: smart_metrics, - timestamp: Utc::now(), }) } } diff --git a/agent/src/notifications.rs b/agent/src/notifications.rs index f106232..7940346 100644 --- a/agent/src/notifications.rs +++ b/agent/src/notifications.rs @@ -1,5 +1,6 @@ use std::collections::HashMap; use chrono::{DateTime, Utc}; +use chrono_tz::Europe::Stockholm; use lettre::{Message, SmtpTransport, Transport}; use serde::{Deserialize, Serialize}; use tracing::{info, error, warn}; @@ -81,15 +82,21 @@ impl NotificationManager { fn should_notify(&mut self, change: &StatusChange) -> bool { if !self.config.enabled { + info!("Notifications disabled, skipping {}.{}", change.component, change.metric); return false; } // Only notify on transitions to warning/critical, or recovery to ok - match (change.old_status.as_str(), change.new_status.as_str()) { + let should_send = match (change.old_status.as_str(), change.new_status.as_str()) { (_, "warning") | (_, "critical") => true, ("warning" | "critical", "ok") => true, _ => false, - } + }; + + info!("Status change {}.{}: {} -> {} (notify: {})", + change.component, change.metric, change.old_status, change.new_status, should_send); + + should_send } fn is_rate_limited(&mut self, change: &StatusChange) -> bool { @@ -98,11 +105,14 @@ impl NotificationManager { if let Some(last_time) = self.last_notification.get(&key) { let minutes_since = Utc::now().signed_duration_since(*last_time).num_minutes(); if minutes_since < self.config.rate_limit_minutes as i64 { + info!("Rate limiting {}.{}: {} minutes since last notification (limit: {})", + change.component, change.metric, minutes_since, self.config.rate_limit_minutes); return true; } } - self.last_notification.insert(key, Utc::now()); + self.last_notification.insert(key.clone(), Utc::now()); + info!("Not rate limited {}.{}, sending notification", change.component, change.metric); false } @@ -161,8 +171,8 @@ impl NotificationManager { change.metric, change.old_status, change.new_status, - change.timestamp.format("%Y-%m-%d %H:%M:%S UTC"), - Utc::now().format("%Y-%m-%d %H:%M:%S UTC") + change.timestamp.with_timezone(&Stockholm).format("%Y-%m-%d %H:%M:%S CET/CEST"), + Utc::now().with_timezone(&Stockholm).format("%Y-%m-%d %H:%M:%S CET/CEST") ) } diff --git a/agent/src/simple_agent.rs b/agent/src/simple_agent.rs index b19ec11..3557d38 100644 --- a/agent/src/simple_agent.rs +++ b/agent/src/simple_agent.rs @@ -41,7 +41,7 @@ impl SimpleAgent { smtp_port: 25, from_email: format!("{}@cmtec.se", hostname), to_email: "cm@cmtec.se".to_string(), - rate_limit_minutes: 30, + rate_limit_minutes: 0, // Disabled for testing }; let notification_manager = NotificationManager::new(notification_config.clone()); info!("Notifications: {} -> {}", notification_config.from_email, notification_config.to_email); @@ -164,6 +164,7 @@ impl SimpleAgent { // Check CPU status if let Some(cpu_status) = summary.get("cpu_status").and_then(|v| v.as_str()) { if let Some(change) = self.notification_manager.update_status("system", "cpu", cpu_status) { + info!("CPU status change detected: {} -> {}", change.old_status, change.new_status); self.notification_manager.send_notification(change).await; } } @@ -175,6 +176,14 @@ impl SimpleAgent { } } + // Check CPU temperature status + if let Some(cpu_temp_status) = summary.get("cpu_temp_status").and_then(|v| v.as_str()) { + if let Some(change) = self.notification_manager.update_status("system", "cpu_temp", cpu_temp_status) { + info!("CPU temp status change detected: {} -> {}", change.old_status, change.new_status); + self.notification_manager.send_notification(change).await; + } + } + // Check services status if let Some(services_status) = summary.get("services_status").and_then(|v| v.as_str()) { if let Some(change) = self.notification_manager.update_status("system", "services", services_status) { diff --git a/dashboard/src/app.rs b/dashboard/src/app.rs index dfe493e..0a5c5a3 100644 --- a/dashboard/src/app.rs +++ b/dashboard/src/app.rs @@ -259,7 +259,7 @@ impl App { if service_metrics.timestamp != timestamp { service_metrics.timestamp = timestamp; } - let mut snapshot = service_metrics.clone(); + let snapshot = service_metrics.clone(); // No more need for dashboard-side description caching since agent handles it diff --git a/dashboard/src/data/metrics.rs b/dashboard/src/data/metrics.rs index 3ca3eb3..16da7c5 100644 --- a/dashboard/src/data/metrics.rs +++ b/dashboard/src/data/metrics.rs @@ -71,6 +71,8 @@ pub struct ServiceSummary { #[serde(default)] pub cpu_temp_c: Option, #[serde(default)] + pub cpu_temp_status: Option, + #[serde(default)] pub gpu_load_percent: Option, #[serde(default)] pub gpu_temp_c: Option, @@ -100,7 +102,7 @@ pub enum ServiceStatus { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BackupMetrics { - pub overall_status: BackupStatus, + pub overall_status: String, pub backup: BackupInfo, pub service: BackupServiceInfo, #[serde(default)] diff --git a/dashboard/src/ui/alerts.rs b/dashboard/src/ui/alerts.rs index f721152..c80ec51 100644 --- a/dashboard/src/ui/alerts.rs +++ b/dashboard/src/ui/alerts.rs @@ -1,6 +1,5 @@ use chrono::{DateTime, Utc}; -use ratatui::layout::{Constraint, Rect}; -use ratatui::style::Color; +use ratatui::layout::Rect; use ratatui::Frame; use crate::app::HostDisplayData; @@ -8,17 +7,7 @@ use crate::ui::system::{evaluate_performance, PerfSeverity}; use crate::ui::widget::{render_widget_data, WidgetData, WidgetStatus, StatusLevel}; pub fn render(frame: &mut Frame, hosts: &[HostDisplayData], area: Rect) { - let (severity, ok_count, warn_count, fail_count) = classify_hosts(hosts); - let mut color = match severity { - AlertSeverity::Critical => Color::Red, - AlertSeverity::Warning => Color::Yellow, - AlertSeverity::Healthy => Color::Green, - AlertSeverity::Unknown => Color::Gray, - }; - - if hosts.is_empty() { - color = Color::Gray; - } + let (severity, _ok_count, _warn_count, _fail_count) = classify_hosts(hosts); let title = "Alerts".to_string(); @@ -140,9 +129,9 @@ fn host_severity(host: &HostDisplayData) -> AlertSeverity { } if let Some(backup) = host.backup.as_ref() { - match backup.overall_status { - crate::data::metrics::BackupStatus::Failed => return AlertSeverity::Critical, - crate::data::metrics::BackupStatus::Warning => return AlertSeverity::Warning, + match backup.overall_status.as_str() { + "critical" => return AlertSeverity::Critical, + "warning" => return AlertSeverity::Warning, _ => {} } } @@ -211,15 +200,15 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) { } if let Some(backup) = host.backup.as_ref() { - match backup.overall_status { - crate::data::metrics::BackupStatus::Failed => { + match backup.overall_status.as_str() { + "critical" => { return ( "critical: backup failed".to_string(), AlertSeverity::Critical, true, ); } - crate::data::metrics::BackupStatus::Warning => { + "warning" => { return ( "warning: backup warning".to_string(), AlertSeverity::Warning, @@ -243,14 +232,6 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) { ("ok".to_string(), AlertSeverity::Healthy, false) } -fn severity_color(severity: AlertSeverity) -> Color { - match severity { - AlertSeverity::Critical => Color::Red, - AlertSeverity::Warning => Color::Yellow, - AlertSeverity::Healthy => Color::Green, - AlertSeverity::Unknown => Color::Gray, - } -} fn latest_timestamp(host: &HostDisplayData) -> Option> { let mut latest = host.last_success; @@ -279,11 +260,3 @@ fn latest_timestamp(host: &HostDisplayData) -> Option> { latest } -fn severity_symbol(severity: AlertSeverity) -> &'static str { - match severity { - AlertSeverity::Critical => "✖", - AlertSeverity::Warning => "!", - AlertSeverity::Healthy => "✔", - AlertSeverity::Unknown => "?", - } -} diff --git a/dashboard/src/ui/backup.rs b/dashboard/src/ui/backup.rs index e317034..23c8702 100644 --- a/dashboard/src/ui/backup.rs +++ b/dashboard/src/ui/backup.rs @@ -1,10 +1,9 @@ use ratatui::layout::Rect; -use ratatui::style::Color; use ratatui::Frame; use crate::app::HostDisplayData; -use crate::data::metrics::{BackupMetrics, BackupStatus}; -use crate::ui::widget::{render_placeholder, render_widget_data, WidgetData, WidgetStatus, StatusLevel}; +use crate::data::metrics::BackupMetrics; +use crate::ui::widget::{render_placeholder, render_widget_data, status_level_from_agent_status, WidgetData, WidgetStatus, StatusLevel}; pub fn render(frame: &mut Frame, host: Option<&HostDisplayData>, area: Rect) { match host { @@ -25,12 +24,7 @@ pub fn render(frame: &mut Frame, host: Option<&HostDisplayData>, area: Rect) { } fn render_metrics(frame: &mut Frame, _host: &HostDisplayData, metrics: &BackupMetrics, area: Rect) { - let widget_status = match metrics.overall_status { - BackupStatus::Failed => StatusLevel::Error, - BackupStatus::Warning => StatusLevel::Warning, - BackupStatus::Unknown => StatusLevel::Unknown, - BackupStatus::Healthy => StatusLevel::Ok, - }; + let widget_status = status_level_from_agent_status(Some(&metrics.overall_status)); let mut data = WidgetData::new( "Backups", @@ -93,46 +87,4 @@ fn render_metrics(frame: &mut Frame, _host: &HostDisplayData, metrics: &BackupMe render_widget_data(frame, area, data); } -fn backup_status_color(status: &BackupStatus) -> Color { - match status { - BackupStatus::Failed => Color::Red, - BackupStatus::Warning => Color::Yellow, - BackupStatus::Unknown => Color::LightYellow, - BackupStatus::Healthy => Color::Green, - } -} -fn format_timestamp(timestamp: Option<&chrono::DateTime>) -> String { - timestamp - .map(|ts| ts.format("%Y-%m-%d %H:%M:%S").to_string()) - .unwrap_or_else(|| "—".to_string()) -} - -fn repo_status_level(metrics: &BackupMetrics) -> StatusLevel { - match metrics.overall_status { - BackupStatus::Failed => StatusLevel::Error, - BackupStatus::Warning => StatusLevel::Warning, - _ => { - if metrics.backup.snapshot_count > 0 { - StatusLevel::Ok - } else { - StatusLevel::Warning - } - } - } -} - -fn service_status_level(metrics: &BackupMetrics) -> StatusLevel { - match metrics.overall_status { - BackupStatus::Failed => StatusLevel::Error, - BackupStatus::Warning => StatusLevel::Warning, - BackupStatus::Unknown => StatusLevel::Unknown, - BackupStatus::Healthy => { - if metrics.service.enabled { - StatusLevel::Ok - } else { - StatusLevel::Warning - } - } - } -} diff --git a/dashboard/src/ui/services.rs b/dashboard/src/ui/services.rs index 5424205..8acc5ce 100644 --- a/dashboard/src/ui/services.rs +++ b/dashboard/src/ui/services.rs @@ -1,9 +1,8 @@ use ratatui::layout::Rect; -use ratatui::style::Color; use ratatui::Frame; use crate::app::HostDisplayData; -use crate::data::metrics::{ServiceStatus, ServiceSummary}; +use crate::data::metrics::ServiceStatus; use crate::ui::widget::{render_placeholder, render_widget_data, status_level_from_agent_status, WidgetData, WidgetStatus, StatusLevel}; pub fn render(frame: &mut Frame, host: Option<&HostDisplayData>, area: Rect) { @@ -31,7 +30,6 @@ fn render_metrics( area: Rect, ) { let summary = &metrics.summary; - let color = summary_color(summary); let title = "Services".to_string(); // Use agent-calculated services status @@ -105,24 +103,6 @@ fn status_weight(status: &ServiceStatus) -> i32 { } } -fn status_symbol(status: &ServiceStatus) -> (&'static str, Color) { - match status { - ServiceStatus::Running => ("✔", Color::Green), - ServiceStatus::Degraded => ("!", Color::Yellow), - ServiceStatus::Restarting => ("↻", Color::Yellow), - ServiceStatus::Stopped => ("✖", Color::Red), - } -} - -fn summary_color(summary: &ServiceSummary) -> Color { - if summary.failed > 0 { - Color::Red - } else if summary.degraded > 0 { - Color::Yellow - } else { - Color::Green - } -} fn format_memory_value(used: f32, quota: f32) -> String { let used_gb = used / 1000.0; diff --git a/dashboard/src/ui/storage.rs b/dashboard/src/ui/storage.rs index 9e0db63..7c9ed14 100644 --- a/dashboard/src/ui/storage.rs +++ b/dashboard/src/ui/storage.rs @@ -1,10 +1,9 @@ use ratatui::layout::Rect; -use ratatui::style::Color; use ratatui::Frame; use crate::app::HostDisplayData; use crate::data::metrics::SmartMetrics; -use crate::ui::widget::{render_placeholder, render_widget_data, WidgetData, WidgetStatus, StatusLevel}; +use crate::ui::widget::{render_placeholder, render_widget_data, status_level_from_agent_status, WidgetData, WidgetStatus, StatusLevel}; pub fn render(frame: &mut Frame, host: Option<&HostDisplayData>, area: Rect) { match host { @@ -25,16 +24,9 @@ pub fn render(frame: &mut Frame, host: Option<&HostDisplayData>, area: Rect) { } fn render_metrics(frame: &mut Frame, _host: &HostDisplayData, metrics: &SmartMetrics, area: Rect) { - let color = smart_status_color(&metrics.status); let title = "Storage".to_string(); - let widget_status = if metrics.summary.critical > 0 { - StatusLevel::Error - } else if metrics.summary.warning > 0 { - StatusLevel::Warning - } else { - StatusLevel::Ok - }; + let widget_status = status_level_from_agent_status(Some(&metrics.status)); let mut data = WidgetData::new( title, @@ -95,13 +87,6 @@ fn render_metrics(frame: &mut Frame, _host: &HostDisplayData, metrics: &SmartMet render_widget_data(frame, area, data); } -fn smart_status_color(status: &str) -> Color { - match status.to_uppercase().as_str() { - "CRITICAL" => Color::Red, - "WARNING" => Color::Yellow, - _ => Color::Green, - } -} fn format_temperature(value: f32) -> String { if value.abs() < f32::EPSILON { diff --git a/dashboard/src/ui/system.rs b/dashboard/src/ui/system.rs index b403a11..1b43bf4 100644 --- a/dashboard/src/ui/system.rs +++ b/dashboard/src/ui/system.rs @@ -1,12 +1,11 @@ use ratatui::layout::Rect; -use ratatui::style::Color; use ratatui::Frame; use crate::app::HostDisplayData; use crate::data::metrics::{ServiceMetrics, ServiceSummary}; use crate::ui::widget::{ - combined_color, render_placeholder, render_combined_widget_data, status_color_for_cpu_load, status_color_from_metric, - status_color_from_percentage, status_level_from_agent_status, WidgetDataSet, WidgetStatus, StatusLevel, + render_placeholder, render_combined_widget_data, + status_level_from_agent_status, WidgetDataSet, WidgetStatus, StatusLevel, }; pub fn render(frame: &mut Frame, host: Option<&HostDisplayData>, area: Rect) { @@ -44,33 +43,19 @@ fn render_metrics( } else { summary.memory_used_mb }; - let usage_ratio = if system_total > 0.0 { + let _usage_ratio = if system_total > 0.0 { (system_used / system_total) * 100.0 } else { 0.0 }; let (perf_severity, _reason) = evaluate_performance(summary); - let border_color = match perf_severity { - PerfSeverity::Critical => Color::Red, - PerfSeverity::Warning => Color::Yellow, - PerfSeverity::Ok => Color::Green, - }; + // Dashboard should NOT calculate border colors - agent is the source of truth // Use agent-calculated statuses instead of dashboard calculations let memory_status = status_level_from_agent_status(summary.memory_status.as_ref()); let cpu_status = status_level_from_agent_status(summary.cpu_status.as_ref()); - let cpu_temp_color = status_color_from_metric(summary.cpu_temp_c, 80.0, 90.0); - let gpu_load_color = summary - .gpu_load_percent - .map(|value| status_color_from_percentage(value, 85.0, 95.0)) - .unwrap_or(Color::Green); - let gpu_temp_color = summary - .gpu_temp_c - .map(|value| status_color_from_metric(Some(value), 75.0, 85.0)) - .unwrap_or(Color::Green); - - let gpu_icon_color = combined_color(&[gpu_load_color, gpu_temp_color]); + // Dashboard should NOT calculate colors - agent is the source of truth // Memory dataset - use agent-calculated status let mut memory_dataset = WidgetDataSet::new(vec!["Memory usage".to_string()], Some(WidgetStatus::new(memory_status))); @@ -156,7 +141,8 @@ fn render_metrics( } // GPU dataset - let gpu_status = status_level_from_color(gpu_icon_color); + // GPU status should come from agent when available + let gpu_status = StatusLevel::Unknown; // Default until agent provides gpu_status let mut gpu_dataset = WidgetDataSet::new(vec!["GPU load".to_string(), "GPU temp".to_string()], Some(WidgetStatus::new(gpu_status))); gpu_dataset.add_row( Some(WidgetStatus::new(gpu_status)), @@ -206,13 +192,6 @@ fn format_optional_percent(value: Option) -> String { } } -fn status_level_from_color(color: Color) -> StatusLevel { - match color { - Color::Red => StatusLevel::Error, - Color::Yellow => StatusLevel::Warning, - _ => StatusLevel::Ok, - } -} pub(crate) fn evaluate_performance(summary: &ServiceSummary) -> (PerfSeverity, Option) { let mem_percent = if summary.system_memory_total_mb > 0.0 { @@ -233,43 +212,38 @@ pub(crate) fn evaluate_performance(summary: &ServiceSummary) -> (PerfSeverity, O } }; - if mem_percent >= 95.0 { - consider(PerfSeverity::Critical, format!("RAM {:.0}%", mem_percent)); - } else if mem_percent >= 80.0 { - consider(PerfSeverity::Warning, format!("RAM {:.0}%", mem_percent)); - } - - let load = summary.cpu_load_5; - if load >= 4.0 { - consider(PerfSeverity::Critical, format!("CPU load {:.2}", load)); - } else if load >= 2.0 { - consider(PerfSeverity::Warning, format!("CPU load {:.2}", load)); - } - - if let Some(temp) = summary.cpu_temp_c { - if temp >= 90.0 { - consider(PerfSeverity::Critical, format!("CPU temp {:.0}°C", temp)); - } else if temp >= 80.0 { - consider(PerfSeverity::Warning, format!("CPU temp {:.0}°C", temp)); + // Use agent's memory status instead of hardcoded thresholds + if let Some(memory_status) = &summary.memory_status { + match memory_status.as_str() { + "critical" => consider(PerfSeverity::Critical, format!("RAM {:.0}%", mem_percent)), + "warning" => consider(PerfSeverity::Warning, format!("RAM {:.0}%", mem_percent)), + _ => {} // "ok" - no alert needed } } - if let Some(load) = summary.gpu_load_percent { - if load >= 95.0 { - consider(PerfSeverity::Critical, format!("GPU load {:.0}%", load)); - } else if load >= 85.0 { - consider(PerfSeverity::Warning, format!("GPU load {:.0}%", load)); + // Use agent's CPU status instead of hardcoded thresholds + if let Some(cpu_status) = &summary.cpu_status { + match cpu_status.as_str() { + "critical" => consider(PerfSeverity::Critical, format!("CPU load {:.2}", summary.cpu_load_5)), + "warning" => consider(PerfSeverity::Warning, format!("CPU load {:.2}", summary.cpu_load_5)), + _ => {} // "ok" - no alert needed } } - if let Some(temp) = summary.gpu_temp_c { - if temp >= 85.0 { - consider(PerfSeverity::Critical, format!("GPU temp {:.0}°C", temp)); - } else if temp >= 75.0 { - consider(PerfSeverity::Warning, format!("GPU temp {:.0}°C", temp)); + // Use agent's CPU temperature status instead of hardcoded thresholds + if let Some(cpu_temp_status) = &summary.cpu_temp_status { + if let Some(temp) = summary.cpu_temp_c { + match cpu_temp_status.as_str() { + "critical" => consider(PerfSeverity::Critical, format!("CPU temp {:.0}°C", temp)), + "warning" => consider(PerfSeverity::Warning, format!("CPU temp {:.0}°C", temp)), + _ => {} // "ok" - no alert needed + } } } + // TODO: GPU status should come from agent, not calculated here with hardcoded thresholds + // For now, remove hardcoded GPU thresholds until agent provides gpu_status + if severity == PerfSeverity::Ok { (PerfSeverity::Ok, None) } else { diff --git a/dashboard/src/ui/widget.rs b/dashboard/src/ui/widget.rs index 61b65cf..eefed32 100644 --- a/dashboard/src/ui/widget.rs +++ b/dashboard/src/ui/widget.rs @@ -24,33 +24,8 @@ fn neutral_border_style(color: Color) -> Style { Style::default().fg(color) } -pub fn status_color_from_percentage(value: f32, warn: f32, crit: f32) -> Color { - if value >= crit { - Color::Red - } else if value >= warn { - Color::Yellow - } else { - Color::Green - } -} -pub fn status_color_from_metric(value: Option, warn: f32, crit: f32) -> Color { - match value { - Some(v) if v >= crit => Color::Red, - Some(v) if v >= warn => Color::Yellow, - _ => Color::Green, - } -} -pub fn status_color_for_cpu_load(load: f32) -> Color { - if load >= 8.0 { - Color::Red - } else if load >= 5.0 { - Color::Yellow - } else { - Color::Green - } -} pub fn status_level_from_agent_status(agent_status: Option<&String>) -> StatusLevel { match agent_status.map(|s| s.as_str()) { @@ -62,15 +37,6 @@ pub fn status_level_from_agent_status(agent_status: Option<&String>) -> StatusLe } } -pub fn combined_color(colors: &[Color]) -> Color { - if colors.iter().any(|&c| c == Color::Red) { - Color::Red - } else if colors.iter().any(|&c| c == Color::Yellow) { - Color::Yellow - } else { - Color::Green - } -} pub fn render_placeholder(frame: &mut Frame, area: Rect, title: &str, message: &str) {