Update version to 0.1.22 and fix system metric status calculation

- Fix /tmp usage status to use proper thresholds instead of hardcoded Ok status - Fix wear level status to use configurable thresholds instead of hardcoded values - Add dedicated tmp_status field to SystemWidget for proper /tmp status display - Remove host-level hourglass icon during service operations - Implement immediate service status updates after start/stop/restart commands - Remove active users display and collection from NixOS section - Fix immediate host status aggregation transmission to dashboard
2025-10-28 13:21:56 +01:00
parent 43242debce
commit 2910b7d875
12 changed files with 51 additions and 105 deletions
--- a/agent/Cargo.toml
+++ b/agent/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cm-dashboard-agent"
-version = "0.1.21"
+version = "0.1.22"
 edition = "2021"

 [dependencies]
--- a/agent/src/agent.rs
+++ b/agent/src/agent.rs
@@ -270,7 +270,7 @@ impl Agent {
    }

    /// Handle systemd service control commands
-    async fn handle_service_control(&self, service_name: &str, action: &ServiceAction) -> Result<()> {
+    async fn handle_service_control(&mut self, service_name: &str, action: &ServiceAction) -> Result<()> {
        let action_str = match action {
            ServiceAction::Start => "start",
            ServiceAction::Stop => "stop", 
@@ -300,9 +300,12 @@ impl Agent {

        // Force refresh metrics after service control to update service status
        if matches!(action, ServiceAction::Start | ServiceAction::Stop | ServiceAction::Restart) {
-            info!("Triggering metric refresh after service control");
-            // Note: We can't call self.collect_metrics_only() here due to borrowing issues
-            // The next metric collection cycle will pick up the changes
+            info!("Triggering immediate metric refresh after service control");
+            if let Err(e) = self.collect_metrics_only().await {
+                error!("Failed to refresh metrics after service control: {}", e);
+            } else {
+                info!("Service status refreshed immediately after {} {}", action_str, service_name);
+            }
        }

        Ok(())
--- a/agent/src/collectors/disk.rs
+++ b/agent/src/collectors/disk.rs
@@ -556,8 +556,8 @@ impl Collector for DiskCollector {

                // Drive wear level (for SSDs)
                if let Some(wear) = drive.wear_level {
-                    let wear_status = if wear >= 90.0 { Status::Critical }
-                                     else if wear >= 80.0 { Status::Warning }
+                    let wear_status = if wear >= self.config.wear_critical_percent { Status::Critical }
+                                     else if wear >= self.config.wear_warning_percent { Status::Warning }
                                     else { Status::Ok };
                    
                    metrics.push(Metric {
--- a/agent/src/collectors/memory.rs
+++ b/agent/src/collectors/memory.rs
@@ -187,7 +187,7 @@ impl MemoryCollector {
        }

        // Monitor tmpfs (/tmp) usage
-        if let Ok(tmpfs_metrics) = self.get_tmpfs_metrics() {
+        if let Ok(tmpfs_metrics) = self.get_tmpfs_metrics(status_tracker) {
            metrics.extend(tmpfs_metrics);
        }

@@ -195,7 +195,7 @@ impl MemoryCollector {
    }

    /// Get tmpfs (/tmp) usage metrics  
-    fn get_tmpfs_metrics(&self) -> Result<Vec<Metric>, CollectorError> {
+    fn get_tmpfs_metrics(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
        use std::process::Command;
        
        let output = Command::new("df")
@@ -249,12 +249,15 @@ impl MemoryCollector {
        let mut metrics = Vec::new();
        let timestamp = chrono::Utc::now().timestamp() as u64;

+        // Calculate status using same thresholds as main memory
+        let tmp_status = self.calculate_usage_status("memory_tmp_usage_percent", usage_percent, status_tracker);
+        
        metrics.push(Metric {
            name: "memory_tmp_usage_percent".to_string(),
            value: MetricValue::Float(usage_percent),
            unit: Some("%".to_string()),
            description: Some("tmpfs /tmp usage percentage".to_string()),
-            status: Status::Ok,
+            status: tmp_status,
            timestamp,
        });

--- a/agent/src/collectors/nixos.rs
+++ b/agent/src/collectors/nixos.rs
@@ -10,7 +10,6 @@ use crate::config::NixOSConfig;
 /// 
 /// Collects NixOS-specific system information including:
 /// - NixOS version and build information
-/// - Currently active/logged in users
 pub struct NixOSCollector {
 }

@@ -65,27 +64,6 @@ impl NixOSCollector {
        Err("Could not extract hash from nix store path".into())
    }

-    /// Get currently active users
-    fn get_active_users(&self) -> Result<Vec<String>, Box<dyn std::error::Error>> {
-        let output = Command::new("who").output()?;
-        
-        if !output.status.success() {
-            return Err("who command failed".into());
-        }
-
-        let who_output = String::from_utf8_lossy(&output.stdout);
-        let mut users = std::collections::HashSet::new();
-
-        for line in who_output.lines() {
-            if let Some(username) = line.split_whitespace().next() {
-                if !username.is_empty() {
-                    users.insert(username.to_string());
-                }
-            }
-        }
-
-        Ok(users.into_iter().collect())
-    }
 }

 #[async_trait]
@@ -121,31 +99,6 @@ impl Collector for NixOSCollector {
            }
        }

-        // Collect active users
-        match self.get_active_users() {
-            Ok(users) => {
-                let users_str = users.join(", ");
-                metrics.push(Metric {
-                    name: "system_active_users".to_string(),
-                    value: MetricValue::String(users_str),
-                    unit: None,
-                    description: Some("Currently active users".to_string()),
-                    status: Status::Ok,
-                    timestamp,
-                });
-            }
-            Err(e) => {
-                debug!("Failed to get active users: {}", e);
-                metrics.push(Metric {
-                    name: "system_active_users".to_string(),
-                    value: MetricValue::String("unknown".to_string()),
-                    unit: None,
-                    description: Some("Active users (failed to detect)".to_string()),
-                    status: Status::Unknown,
-                    timestamp,
-                });
-            }
-        }

        // Collect config hash
        match self.get_config_hash() {
--- a/agent/src/status/mod.rs
+++ b/agent/src/status/mod.rs
@@ -160,27 +160,37 @@ impl HostStatusManager {

    /// Process a metric - updates status and queues for aggregated notifications if status changed
    pub async fn process_metric(&mut self, metric: &Metric, _notification_manager: &mut crate::notifications::NotificationManager) -> bool {
-        let old_status = self.service_statuses.get(&metric.name).copied();
-        let new_status = metric.status;
+        let old_service_status = self.service_statuses.get(&metric.name).copied();
+        let old_host_status = self.current_host_status;
+        let new_service_status = metric.status;
        
-        // Update status
-        self.update_service_status(metric.name.clone(), new_status);
+        // Update status (this recalculates host status internally)
+        self.update_service_status(metric.name.clone(), new_service_status);
        
-        // Check if status actually changed (ignore first-time status setting)
-        if let Some(old_status) = old_status {
-            if old_status != new_status {
-                debug!("Status change detected for {}: {:?} -> {:?}", metric.name, old_status, new_status);
+        let new_host_status = self.current_host_status;
+        let mut status_changed = false;
+        
+        // Check if service status actually changed (ignore first-time status setting)
+        if let Some(old_service_status) = old_service_status {
+            if old_service_status != new_service_status {
+                debug!("Service status change detected for {}: {:?} -> {:?}", metric.name, old_service_status, new_service_status);
                
                // Queue change for aggregated notification (not immediate)
-                self.queue_status_change(&metric.name, old_status, new_status);
+                self.queue_status_change(&metric.name, old_service_status, new_service_status);
                
-                return true; // Status changed - caller should trigger immediate transmission
+                status_changed = true;
            }
        } else {
-            debug!("Initial status set for {}: {:?}", metric.name, new_status);
+            debug!("Initial status set for {}: {:?}", metric.name, new_service_status);
        }
        
-        false // No status change (or first-time status)
+        // Check if host status changed (this should trigger immediate transmission)
+        if old_host_status != new_host_status {
+            debug!("Host status change detected: {:?} -> {:?}", old_host_status, new_host_status);
+            status_changed = true;
+        }
+        
+        status_changed // Return true if either service or host status changed
    }

    /// Queue status change for aggregated notification