Fix Data_3 showing as unknown by handling smartctl warning exit codes

Root cause: sda's temperature exceeded threshold in the past, causing smartctl to return exit code 32 (warning: "Attributes have been <= threshold in the past"). The agent checked output.status.success() and rejected the entire output as failed, even though the data (serial, temperature, health) was perfectly valid. Smartctl exit codes are bit flags for informational warnings: - Exit 0: No warnings - Exit 32 (bit 5): Attributes were at/below threshold in past - Exit 64 (bit 6): Error log has entries - etc. The output data is valid regardless of these warning flags. Solution: Parse output as long as it's not empty, ignore exit code. Only return UNKNOWN if output is actually empty (command truly failed). Result: Data_3 will now show "ZDZ4VE0B T: 31°C" instead of "? Data_3: sda" Bump version to v0.1.225
Fix Data_3 timeout by removing sequential SMART during pool detection
2025-11-30 00:35:19 +01:00 · 2025-11-30 00:14:25 +01:00 · 2025-11-29 23:51:43 +01:00 · 2025-11-29 23:25:17 +01:00 · 2025-11-29 21:29:33 +01:00 · 2025-11-29 21:09:04 +01:00
16 changed files with 578 additions and 285 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -327,9 +327,16 @@ Storage:
  ├─ ● Data_2: GGA04461 T: 28°C
  └─ ● Parity: WDZS8RY0 T: 29°C
 Backup:
+● Repo: 4
+  ├─ getea
+  ├─ vaultwarden
+  ├─ mysql
+  └─ immich
+● W800639Y W: 2%
+  ├─ ● Backup: 2025-11-29T04:00:01.324623
+  └─ ● Usage: 8% 70GB/916GB
 ● WD-WCC7K1234567 T: 32°C W: 12%
-  ├─ Last: 2h ago (12.3GB)
-  ├─ Next: in 22h
+  ├─ ● Backup: 2025-11-29T04:00:01.324623
  └─ ● Usage: 45% 678GB/1.5TB
 ```

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"

 [[package]]
 name = "cm-dashboard"
-version = "0.1.203"
+version = "0.1.224"
 dependencies = [
 "anyhow",
 "chrono",
@@ -301,7 +301,7 @@ dependencies = [

 [[package]]
 name = "cm-dashboard-agent"
-version = "0.1.203"
+version = "0.1.224"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -309,6 +309,7 @@ dependencies = [
 "chrono-tz",
 "clap",
 "cm-dashboard-shared",
+ "futures",
 "gethostname",
 "lettre",
 "reqwest",
@@ -324,7 +325,7 @@ dependencies = [

 [[package]]
 name = "cm-dashboard-shared"
-version = "0.1.203"
+version = "0.1.224"
 dependencies = [
 "chrono",
 "serde",
@@ -552,6 +553,21 @@ dependencies = [
 "percent-encoding",
 ]

+[[package]]
+name = "futures"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
 [[package]]
 name = "futures-channel"
 version = "0.3.31"
@@ -559,6 +575,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
 dependencies = [
 "futures-core",
+ "futures-sink",
 ]

 [[package]]
@@ -567,12 +584,34 @@ version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"

+[[package]]
+name = "futures-executor"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
 [[package]]
 name = "futures-io"
 version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"

+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "futures-sink"
 version = "0.3.31"
@@ -591,8 +630,11 @@ version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
 dependencies = [
+ "futures-channel",
 "futures-core",
 "futures-io",
+ "futures-macro",
+ "futures-sink",
 "futures-task",
 "memchr",
 "pin-project-lite",
--- a/agent/Cargo.toml
+++ b/agent/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cm-dashboard-agent"
-version = "0.1.204"
+version = "0.1.225"
 edition = "2021"

 [dependencies]
@@ -21,3 +21,4 @@ chrono-tz = "0.8"
 toml = { workspace = true }
 async-trait = "0.1"
 reqwest = { version = "0.11", features = ["json", "blocking"] }
+futures = "0.3"
--- a/agent/src/agent.rs
+++ b/agent/src/agent.rs
@@ -114,7 +114,7 @@ impl Agent {

        // Set up intervals
        let mut transmission_interval = interval(Duration::from_secs(
-            self.config.collection_interval_seconds,
+            self.config.zmq.transmission_interval_seconds,
        ));
        let mut notification_interval = interval(Duration::from_secs(30)); // Check notifications every 30s

--- a/agent/src/collectors/backup.rs
+++ b/agent/src/collectors/backup.rs
@@ -1,36 +1,66 @@
 use async_trait::async_trait;
-use cm_dashboard_shared::{AgentData, BackupData, BackupDiskData};
+use cm_dashboard_shared::{AgentData, BackupData, BackupDiskData, Status};
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::fs;
-use std::path::Path;
-use tracing::debug;
+use std::path::{Path, PathBuf};
+use tracing::{debug, warn};

 use super::{Collector, CollectorError};

 /// Backup collector that reads backup status from TOML files with structured data output
 pub struct BackupCollector {
-    /// Path to backup status file
-    status_file_path: String,
+    /// Directory containing backup status files
+    status_dir: String,
 }

 impl BackupCollector {
    pub fn new() -> Self {
        Self {
-            status_file_path: "/var/lib/backup/backup-status.toml".to_string(),
+            status_dir: "/var/lib/backup/status".to_string(),
        }
    }

-    /// Read backup status from TOML file
-    async fn read_backup_status(&self) -> Result<Option<BackupStatusToml>, CollectorError> {
-        if !Path::new(&self.status_file_path).exists() {
-            debug!("Backup status file not found: {}", self.status_file_path);
-            return Ok(None);
+    /// Scan directory for all backup status files
+    async fn scan_status_files(&self) -> Result<Vec<PathBuf>, CollectorError> {
+        let status_path = Path::new(&self.status_dir);
+
+        if !status_path.exists() {
+            debug!("Backup status directory not found: {}", self.status_dir);
+            return Ok(Vec::new());
        }

-        let content = fs::read_to_string(&self.status_file_path)
+        let mut status_files = Vec::new();
+
+        match fs::read_dir(status_path) {
+            Ok(entries) => {
+                for entry in entries {
+                    if let Ok(entry) = entry {
+                        let path = entry.path();
+                        if path.is_file() {
+                            if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
+                                if filename.starts_with("backup-status-") && filename.ends_with(".toml") {
+                                    status_files.push(path);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            Err(e) => {
+                warn!("Failed to read backup status directory: {}", e);
+                return Ok(Vec::new());
+            }
+        }
+
+        Ok(status_files)
+    }
+
+    /// Read a single backup status file
+    async fn read_status_file(&self, path: &Path) -> Result<BackupStatusToml, CollectorError> {
+        let content = fs::read_to_string(path)
            .map_err(|e| CollectorError::SystemRead {
-                path: self.status_file_path.clone(),
+                path: path.to_string_lossy().to_string(),
                error: e.to_string(),
            })?;

@@ -40,65 +70,121 @@ impl BackupCollector {
                error: format!("Failed to parse backup status TOML: {}", e),
            })?;

-        Ok(Some(status))
+        Ok(status)
+    }
+
+    /// Calculate backup status from TOML status field
+    fn calculate_backup_status(status_str: &str) -> Status {
+        match status_str.to_lowercase().as_str() {
+            "success" => Status::Ok,
+            "warning" => Status::Warning,
+            "failed" | "error" => Status::Critical,
+            _ => Status::Unknown,
+        }
+    }
+
+    /// Calculate usage status from disk usage percentage
+    fn calculate_usage_status(usage_percent: f32) -> Status {
+        if usage_percent < 80.0 {
+            Status::Ok
+        } else if usage_percent < 90.0 {
+            Status::Warning
+        } else {
+            Status::Critical
+        }
    }

    /// Convert BackupStatusToml to BackupData and populate AgentData
    async fn populate_backup_data(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
-        if let Some(backup_status) = self.read_backup_status().await? {
-            // Use raw start_time string from TOML
+        let status_files = self.scan_status_files().await?;

-            // Extract disk information
-            let repository_disk = if let Some(disk_space) = &backup_status.disk_space {
-                Some(BackupDiskData {
-                    serial: backup_status.disk_serial_number.clone().unwrap_or_else(|| "Unknown".to_string()),
-                    usage_percent: disk_space.usage_percent as f32,
-                    used_gb: disk_space.used_gb as f32,
-                    total_gb: disk_space.total_gb as f32,
+        if status_files.is_empty() {
+            debug!("No backup status files found");
+            agent_data.backup = BackupData {
+                repositories: Vec::new(),
+                repository_status: Status::Unknown,
+                disks: Vec::new(),
+            };
+            return Ok(());
+        }
+
+        let mut all_repositories = HashSet::new();
+        let mut disks = Vec::new();
+        let mut worst_status = Status::Ok;
+
+        for status_file in status_files {
+            match self.read_status_file(&status_file).await {
+                Ok(backup_status) => {
+                    // Collect all service names
+                    for service_name in backup_status.services.keys() {
+                        all_repositories.insert(service_name.clone());
+                    }
+
+                    // Calculate backup status
+                    let backup_status_enum = Self::calculate_backup_status(&backup_status.status);
+
+                    // Calculate usage status from disk space
+                    let (usage_percent, used_gb, total_gb, usage_status) = if let Some(disk_space) = &backup_status.disk_space {
+                        let usage_pct = disk_space.usage_percent as f32;
+                        (
+                            usage_pct,
+                            disk_space.used_gb as f32,
+                            disk_space.total_gb as f32,
+                            Self::calculate_usage_status(usage_pct),
+                        )
+                    } else {
+                        (0.0, 0.0, 0.0, Status::Unknown)
+                    };
+
+                    // Update worst status
+                    worst_status = worst_status.max(backup_status_enum).max(usage_status);
+
+                    // Build service list for this disk
+                    let services: Vec<String> = backup_status.services.keys().cloned().collect();
+
+                    // Get min and max archive counts to detect inconsistencies
+                    let archives_min: i64 = backup_status.services.values()
+                        .map(|service| service.archive_count)
+                        .min()
+                        .unwrap_or(0);
+
+                    let archives_max: i64 = backup_status.services.values()
+                        .map(|service| service.archive_count)
+                        .max()
+                        .unwrap_or(0);
+
+                    // Create disk data
+                    let disk_data = BackupDiskData {
+                        serial: backup_status.disk_serial_number.unwrap_or_else(|| "Unknown".to_string()),
+                        product_name: backup_status.disk_product_name,
                        wear_percent: backup_status.disk_wear_percent,
                        temperature_celsius: None, // Not available in current TOML
-                })
-            } else if let Some(serial) = &backup_status.disk_serial_number {
-                // Fallback: create minimal disk info if we have serial but no disk_space
-                Some(BackupDiskData {
-                    serial: serial.clone(),
-                    usage_percent: 0.0,
-                    used_gb: 0.0,
-                    total_gb: 0.0,
-                    wear_percent: backup_status.disk_wear_percent,
-                    temperature_celsius: None,
-                })
-            } else {
-                None
+                        last_backup_time: Some(backup_status.start_time),
+                        backup_status: backup_status_enum,
+                        disk_usage_percent: usage_percent,
+                        disk_used_gb: used_gb,
+                        disk_total_gb: total_gb,
+                        usage_status,
+                        services,
+                        archives_min,
+                        archives_max,
                    };

-            // Calculate total repository size from services
-            let total_size_gb = backup_status.services
-                .values()
-                .map(|service| service.repo_size_bytes as f32 / (1024.0 * 1024.0 * 1024.0))
-                .sum::<f32>();
-
-            let backup_data = BackupData {
-                status: backup_status.status,
-                total_size_gb: Some(total_size_gb),
-                repository_health: Some("ok".to_string()), // Derive from status if needed
-                repository_disk,
-                last_backup_size_gb: None, // Not available in current TOML format
-                start_time_raw: Some(backup_status.start_time),
-            };
-
-            agent_data.backup = backup_data;
-        } else {
-            // No backup status available - set default values
-            agent_data.backup = BackupData {
-                status: "unavailable".to_string(),
-                total_size_gb: None,
-                repository_health: None,
-                repository_disk: None,
-                last_backup_size_gb: None,
-                start_time_raw: None,
-            };
+                    disks.push(disk_data);
                }
+                Err(e) => {
+                    warn!("Failed to read backup status file {:?}: {}", status_file, e);
+                }
+            }
+        }
+
+        let repositories: Vec<String> = all_repositories.into_iter().collect();
+
+        agent_data.backup = BackupData {
+            repositories,
+            repository_status: worst_status,
+            disks,
+        };

        Ok(())
    }
--- a/agent/src/collectors/cpu.rs
+++ b/agent/src/collectors/cpu.rs
@@ -119,36 +119,69 @@ impl CpuCollector {
        utils::parse_u64(content.trim())
    }

-    /// Collect CPU frequency and populate AgentData
-    async fn collect_frequency(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
-        // Try scaling frequency first (more accurate for current frequency)
-        if let Ok(freq) =
-            utils::read_proc_file("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq")
-        {
-            if let Ok(freq_khz) = utils::parse_u64(freq.trim()) {
-                let freq_mhz = freq_khz as f32 / 1000.0;
-                agent_data.system.cpu.frequency_mhz = freq_mhz;
-                return Ok(());
+    /// Collect CPU C-state (idle depth) and populate AgentData with top 3 C-states by usage
+    async fn collect_cstate(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
+        // Read C-state usage from first CPU (representative of overall system)
+        // C-states indicate CPU idle depth: C1=light sleep, C6=deep sleep, C10=deepest
+
+        let mut cstate_times: Vec<(String, u64)> = Vec::new();
+        let mut total_time: u64 = 0;
+
+        // Collect all C-state times from CPU0
+        for state_num in 0..=10 {
+            let time_path = format!("/sys/devices/system/cpu/cpu0/cpuidle/state{}/time", state_num);
+            let name_path = format!("/sys/devices/system/cpu/cpu0/cpuidle/state{}/name", state_num);
+
+            if let Ok(time_str) = utils::read_proc_file(&time_path) {
+                if let Ok(time) = utils::parse_u64(time_str.trim()) {
+                    if let Ok(name) = utils::read_proc_file(&name_path) {
+                        let state_name = name.trim();
+                        // Skip POLL state (not real idle)
+                        if state_name != "POLL" && time > 0 {
+                            // Extract "C" + digits pattern (C3, C10, etc.) to reduce JSON size
+                            // Handles formats like "C3_ACPI", "C10_MWAIT", etc.
+                            let clean_name = if let Some(c_pos) = state_name.find('C') {
+                                let rest = &state_name[c_pos + 1..];
+                                let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
+                                if digit_count > 0 {
+                                    state_name[c_pos..c_pos + 1 + digit_count].to_string()
+                                } else {
+                                    state_name.to_string()
+                                }
+                            } else {
+                                state_name.to_string()
+                            };
+                            cstate_times.push((clean_name, time));
+                            total_time += time;
+                        }
+                    }
+                }
+            } else {
+                // No more states available
+                break;
            }
        }

-        // Fallback: parse /proc/cpuinfo for base frequency
-        if let Ok(content) = utils::read_proc_file("/proc/cpuinfo") {
-            for line in content.lines() {
-                if line.starts_with("cpu MHz") {
-                    if let Some(freq_str) = line.split(':').nth(1) {
-                        if let Ok(freq_mhz) = utils::parse_f32(freq_str) {
-                            agent_data.system.cpu.frequency_mhz = freq_mhz;
-                            return Ok(());
-                        }
-                    }
-                    break; // Only need first CPU entry
-                }
-            }
-        }
+        // Sort by time descending to get top 3
+        cstate_times.sort_by(|a, b| b.1.cmp(&a.1));
+
+        // Calculate percentages for top 3 and populate AgentData
+        agent_data.system.cpu.cstates = cstate_times
+            .iter()
+            .take(3)
+            .map(|(name, time)| {
+                let percent = if total_time > 0 {
+                    (*time as f32 / total_time as f32) * 100.0
+                } else {
+                    0.0
+                };
+                cm_dashboard_shared::CStateInfo {
+                    name: name.clone(),
+                    percent,
+                }
+            })
+            .collect();

-        debug!("CPU frequency not available");
-        // Leave frequency as 0.0 if not available
        Ok(())
    }
 }
@@ -165,8 +198,8 @@ impl Collector for CpuCollector {
        // Collect temperature (optional)
        self.collect_temperature(agent_data).await?;

-        // Collect frequency (optional)
-        self.collect_frequency(agent_data).await?;
+        // Collect C-state (CPU idle depth)
+        self.collect_cstate(agent_data).await?;

        let duration = start.elapsed();
        debug!("CPU collection completed in {:?}", duration);
--- a/agent/src/collectors/disk.rs
+++ b/agent/src/collectors/disk.rs
@@ -3,7 +3,8 @@ use async_trait::async_trait;
 use cm_dashboard_shared::{AgentData, DriveData, FilesystemData, PoolData, HysteresisThresholds, Status};

 use crate::config::DiskConfig;
-use std::process::Command;
+use tokio::process::Command as TokioCommand;
+use std::process::Command as StdCommand;
 use std::time::Instant;
 use std::collections::HashMap;
 use tracing::debug;
@@ -114,7 +115,7 @@ impl DiskCollector {
    async fn get_mount_devices(&self) -> Result<HashMap<String, String>, CollectorError> {
        use super::run_command_with_timeout;

-        let mut cmd = Command::new("lsblk");
+        let mut cmd = TokioCommand::new("lsblk");
        cmd.args(&["-rn", "-o", "NAME,MOUNTPOINT"]);

        let output = run_command_with_timeout(cmd, 2).await
@@ -189,7 +190,7 @@ impl DiskCollector {

    /// Get filesystem info for a single mount point
    fn get_filesystem_info(&self, mount_point: &str) -> Result<(u64, u64), CollectorError> {
-        let output = std::process::Command::new("timeout")
+        let output = StdCommand::new("timeout")
            .args(&["2", "df", "--block-size=1", mount_point])
            .output()
            .map_err(|e| CollectorError::SystemRead {
@@ -386,9 +387,9 @@ impl DiskCollector {
        device.to_string()
    }

-    /// Get SMART data for drives
+    /// Get SMART data for drives in parallel
    async fn get_smart_data_for_drives(&self, physical_drives: &[PhysicalDrive], mergerfs_pools: &[MergerfsPool]) -> HashMap<String, SmartData> {
-        let mut smart_data = HashMap::new();
+        use futures::future::join_all;

        // Collect all drive names
        let mut all_drives = std::collections::HashSet::new();
@@ -404,9 +405,24 @@ impl DiskCollector {
            }
        }

-        // Get SMART data for each drive
-        for drive_name in all_drives {
-            if let Ok(data) = self.get_smart_data(&drive_name).await {
+        // Collect SMART data for all drives in parallel
+        let futures: Vec<_> = all_drives
+            .iter()
+            .map(|drive_name| {
+                let drive = drive_name.clone();
+                async move {
+                    let result = self.get_smart_data(&drive).await;
+                    (drive, result)
+                }
+            })
+            .collect();
+
+        let results = join_all(futures).await;
+
+        // Build HashMap from results
+        let mut smart_data = HashMap::new();
+        for (drive_name, result) in results {
+            if let Ok(data) = result {
                smart_data.insert(drive_name, data);
            }
        }
@@ -420,7 +436,7 @@ impl DiskCollector {

        // Use direct smartctl (no sudo) - service has CAP_SYS_RAWIO and CAP_SYS_ADMIN capabilities
        // For NVMe drives, specify device type explicitly
-        let mut cmd = Command::new("smartctl");
+        let mut cmd = TokioCommand::new("smartctl");
        if drive_name.starts_with("nvme") {
            cmd.args(&["-d", "nvme", "-a", &format!("/dev/{}", drive_name)]);
        } else {
@@ -435,8 +451,10 @@ impl DiskCollector {

        let output_str = String::from_utf8_lossy(&output.stdout);

-        if !output.status.success() {
-            // Return unknown data rather than failing completely
+        // Note: smartctl returns non-zero exit codes for warnings (like exit code 32
+        // for "temperature was high in the past"), but the output data is still valid.
+        // Only check if we got any output at all, don't reject based on exit code.
+        if output_str.is_empty() {
            return Ok(SmartData {
                health: "UNKNOWN".to_string(),
                serial_number: None,
@@ -763,7 +781,7 @@ impl DiskCollector {
    /// Get drive information for a mount path
    fn get_drive_info_for_path(&self, path: &str) -> anyhow::Result<PoolDrive> {
        // Use lsblk to find the backing device with timeout
-        let output = Command::new("timeout")
+        let output = StdCommand::new("timeout")
            .args(&["2", "lsblk", "-rn", "-o", "NAME,MOUNTPOINT"])
            .output()
            .map_err(|e| anyhow::anyhow!("Failed to run lsblk: {}", e))?;
@@ -786,19 +804,12 @@ impl DiskCollector {
        // Extract base device name (e.g., "sda1" -> "sda")
        let base_device = self.extract_base_device(&format!("/dev/{}", device));

-        // Get temperature from SMART data if available
-        let temperature = if let Ok(smart_data) = tokio::task::block_in_place(|| {
-            tokio::runtime::Handle::current().block_on(self.get_smart_data(&base_device))
-        }) {
-            smart_data.temperature_celsius
-        } else {
-            None
-        };
-        
+        // Temperature will be filled in later from parallel SMART collection
+        // Don't collect it here to avoid sequential blocking with problematic async nesting
        Ok(PoolDrive {
            name: base_device,
            mount_point: path.to_string(),
-            temperature_celsius: temperature,
+            temperature_celsius: None,
        })
    }

--- a/agent/src/collectors/mod.rs
+++ b/agent/src/collectors/mod.rs
@@ -1,8 +1,7 @@
 use async_trait::async_trait;
 use cm_dashboard_shared::{AgentData};
-use std::process::{Command, Output};
+use std::process::Output;
 use std::time::Duration;
-use tokio::time::timeout;

 pub mod backup;
 pub mod cpu;
@@ -16,16 +15,34 @@ pub mod systemd;
 pub use error::CollectorError;

 /// Run a command with a timeout to prevent blocking
-pub async fn run_command_with_timeout(mut cmd: Command, timeout_secs: u64) -> std::io::Result<Output> {
+/// Properly kills the process if timeout is exceeded
+pub async fn run_command_with_timeout(mut cmd: tokio::process::Command, timeout_secs: u64) -> std::io::Result<Output> {
+    use tokio::time::timeout;
+    use std::process::Stdio;
    let timeout_duration = Duration::from_secs(timeout_secs);

-    match timeout(timeout_duration, tokio::task::spawn_blocking(move || cmd.output())).await {
-        Ok(Ok(result)) => result,
-        Ok(Err(e)) => Err(std::io::Error::new(std::io::ErrorKind::Other, e)),
-        Err(_) => Err(std::io::Error::new(
+    // Configure stdio to capture output
+    cmd.stdout(Stdio::piped());
+    cmd.stderr(Stdio::piped());
+
+    let child = cmd.spawn()?;
+    let pid = child.id();
+
+    match timeout(timeout_duration, child.wait_with_output()).await {
+        Ok(result) => result,
+        Err(_) => {
+            // Timeout - force kill the process using system kill command
+            if let Some(process_id) = pid {
+                let _ = tokio::process::Command::new("kill")
+                    .args(&["-9", &process_id.to_string()])
+                    .output()
+                    .await;
+            }
+            Err(std::io::Error::new(
                std::io::ErrorKind::TimedOut,
                format!("Command timed out after {} seconds", timeout_secs)
-        )),
+            ))
+        }
    }
 }

--- a/agent/src/collectors/systemd.rs
+++ b/agent/src/collectors/systemd.rs
@@ -43,9 +43,10 @@ struct ServiceCacheState {
 /// Cached service status information from systemctl list-units
 #[derive(Debug, Clone)]
 struct ServiceStatusInfo {
-    load_state: String,
    active_state: String,
-    sub_state: String,
+    memory_bytes: Option<u64>,
+    restart_count: Option<u32>,
+    start_timestamp: Option<u64>,
 }

 impl SystemdCollector {
@@ -86,11 +87,20 @@ impl SystemdCollector {
        let mut complete_service_data = Vec::new();
        for service_name in &monitored_services {
            match self.get_service_status(service_name) {
-                Ok((active_status, _detailed_info)) => {
+                Ok(status_info) => {
                    let mut sub_services = Vec::new();

+                    // Calculate uptime if we have start timestamp
+                    let uptime_seconds = status_info.start_timestamp.and_then(|start| {
+                        let now = std::time::SystemTime::now()
+                            .duration_since(std::time::UNIX_EPOCH)
+                            .ok()?
+                            .as_secs();
+                        Some(now.saturating_sub(start))
+                    });
+
                    // Sub-service metrics for specific services (always include cached results)
-                    if service_name.contains("nginx") && active_status == "active" {
+                    if service_name.contains("nginx") && status_info.active_state == "active" {
                        let nginx_sites = self.get_nginx_site_metrics();
                        for (site_name, latency_ms) in nginx_sites {
                            let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms {
@@ -115,7 +125,7 @@ impl SystemdCollector {
                        }
                    }

-                    if service_name.contains("docker") && active_status == "active" {
+                    if service_name.contains("docker") && status_info.active_state == "active" {
                        let docker_containers = self.get_docker_containers();
                        for (container_name, container_status) in docker_containers {
                            // For now, docker containers have no additional metrics
@@ -153,8 +163,11 @@ impl SystemdCollector {
                    let service_data = ServiceData {
                        name: service_name.clone(),
                        user_stopped: false, // TODO: Integrate with service tracker
-                        service_status: self.calculate_service_status(service_name, &active_status),
+                        service_status: self.calculate_service_status(service_name, &status_info.active_state),
                        sub_services,
+                        memory_bytes: status_info.memory_bytes,
+                        restart_count: status_info.restart_count,
+                        uptime_seconds,
                    };

                    // Add to AgentData and cache
@@ -290,14 +303,13 @@ impl SystemdCollector {
            let fields: Vec<&str> = line.split_whitespace().collect();
            if fields.len() >= 4 && fields[0].ends_with(".service") {
                let service_name = fields[0].trim_end_matches(".service");
-                let load_state = fields.get(1).unwrap_or(&"unknown").to_string();
                let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
-                let sub_state = fields.get(3).unwrap_or(&"unknown").to_string();

                status_cache.insert(service_name.to_string(), ServiceStatusInfo {
-                    load_state,
                    active_state,
-                    sub_state,
+                    memory_bytes: None,
+                    restart_count: None,
+                    start_timestamp: None,
                });
            }
        }
@@ -306,9 +318,10 @@ impl SystemdCollector {
        for service_name in &all_service_names {
            if !status_cache.contains_key(service_name) {
                status_cache.insert(service_name.to_string(), ServiceStatusInfo {
-                    load_state: "not-loaded".to_string(),
                    active_state: "inactive".to_string(),
-                    sub_state: "dead".to_string(),
+                    memory_bytes: None,
+                    restart_count: None,
+                    start_timestamp: None,
                });
            }
        }
@@ -340,36 +353,60 @@ impl SystemdCollector {
        Ok((services, status_cache))
    }

-    /// Get service status from cache (if available) or fallback to systemctl
-    fn get_service_status(&self, service: &str) -> Result<(String, String)> {
-        // Try to get status from cache first
-        if let Ok(state) = self.state.read() {
-            if let Some(cached_info) = state.service_status_cache.get(service) {
-                let active_status = cached_info.active_state.clone();
-                let detailed_info = format!(
-                    "LoadState={}\nActiveState={}\nSubState={}",
-                    cached_info.load_state,
-                    cached_info.active_state,
-                    cached_info.sub_state
-                );
-                return Ok((active_status, detailed_info));
+    /// Get service status with detailed metrics from systemctl
+    fn get_service_status(&self, service: &str) -> Result<ServiceStatusInfo> {
+        // Always fetch fresh data to get detailed metrics (memory, restarts, uptime)
+        // Note: Cache in service_status_cache only has basic active_state from discovery,
+        // with all detailed metrics set to None. We need fresh systemctl show data.
+
+        let output = Command::new("timeout")
+            .args(&[
+                "2",
+                "systemctl",
+                "show",
+                &format!("{}.service", service),
+                "--property=LoadState,ActiveState,SubState,MemoryCurrent,NRestarts,ExecMainStartTimestamp"
+            ])
+            .output()?;
+
+        let output_str = String::from_utf8(output.stdout)?;
+
+        // Parse properties
+        let mut active_state = String::new();
+        let mut memory_bytes = None;
+        let mut restart_count = None;
+        let mut start_timestamp = None;
+
+        for line in output_str.lines() {
+            if let Some(value) = line.strip_prefix("ActiveState=") {
+                active_state = value.to_string();
+            } else if let Some(value) = line.strip_prefix("MemoryCurrent=") {
+                if value != "[not set]" {
+                    memory_bytes = value.parse().ok();
+                }
+            } else if let Some(value) = line.strip_prefix("NRestarts=") {
+                restart_count = value.parse().ok();
+            } else if let Some(value) = line.strip_prefix("ExecMainStartTimestamp=") {
+                if value != "[not set]" && !value.is_empty() {
+                    // Parse timestamp to seconds since epoch
+                    if let Ok(output) = Command::new("date")
+                        .args(&["+%s", "-d", value])
+                        .output()
+                    {
+                        if let Ok(timestamp_str) = String::from_utf8(output.stdout) {
+                            start_timestamp = timestamp_str.trim().parse().ok();
+                        }
+                    }
+                }
            }
        }

-        // Fallback to systemctl if not in cache (with 2 second timeout)
-        let output = Command::new("timeout")
-            .args(&["2", "systemctl", "is-active", &format!("{}.service", service)])
-            .output()?;
-
-        let active_status = String::from_utf8(output.stdout)?.trim().to_string();
-
-        // Get more detailed info (with 2 second timeout)
-        let output = Command::new("timeout")
-            .args(&["2", "systemctl", "show", &format!("{}.service", service), "--property=LoadState,ActiveState,SubState"])
-            .output()?;
-
-        let detailed_info = String::from_utf8(output.stdout)?;
-        Ok((active_status, detailed_info))
+        Ok(ServiceStatusInfo {
+            active_state,
+            memory_bytes,
+            restart_count,
+            start_timestamp,
+        })
    }

    /// Check if service name matches pattern (supports wildcards like nginx*)
--- a/agent/src/config/mod.rs
+++ b/agent/src/config/mod.rs
@@ -13,7 +13,6 @@ pub struct AgentConfig {
    pub collectors: CollectorConfig,
    pub cache: CacheConfig,
    pub notifications: NotificationConfig,
-    pub collection_interval_seconds: u64,
 }

 /// ZMQ communication configuration
--- a/agent/src/config/validation.rs
+++ b/agent/src/config/validation.rs
@@ -11,9 +11,9 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
        bail!("ZMQ bind address cannot be empty");
    }

-    // Validate collection interval
-    if config.collection_interval_seconds == 0 {
-        bail!("Collection interval cannot be 0");
+    // Validate ZMQ transmission interval
+    if config.zmq.transmission_interval_seconds == 0 {
+        bail!("ZMQ transmission interval cannot be 0");
    }

    // Validate CPU thresholds
--- a/dashboard/Cargo.toml
+++ b/dashboard/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cm-dashboard"
-version = "0.1.204"
+version = "0.1.225"
 edition = "2021"

 [dependencies]
--- a/dashboard/src/ui/widgets/services.rs
+++ b/dashboard/src/ui/widgets/services.rs
@@ -31,6 +31,9 @@ struct ServiceInfo {
    metrics: Vec<(String, f32, Option<String>)>, // (label, value, unit)
    widget_status: Status,
    service_type: String, // "nginx_site", "container", "image", or empty for parent services
+    memory_bytes: Option<u64>,
+    restart_count: Option<u32>,
+    uptime_seconds: Option<u64>,
 }

 impl ServicesWidget {
@@ -92,9 +95,43 @@ impl ServicesWidget {
            Status::Offline => "offline",
        };

+        // Format memory
+        let memory_str = info.memory_bytes.map_or("-".to_string(), |bytes| {
+            let mb = bytes as f64 / (1024.0 * 1024.0);
+            if mb >= 1000.0 {
+                format!("{:.1}G", mb / 1024.0)
+            } else {
+                format!("{:.0}M", mb)
+            }
+        });
+
+        // Format uptime
+        let uptime_str = info.uptime_seconds.map_or("-".to_string(), |secs| {
+            let days = secs / 86400;
+            let hours = (secs % 86400) / 3600;
+            let mins = (secs % 3600) / 60;
+
+            if days > 0 {
+                format!("{}d{}h", days, hours)
+            } else if hours > 0 {
+                format!("{}h{}m", hours, mins)
+            } else {
+                format!("{}m", mins)
+            }
+        });
+
+        // Format restarts (show "!" if > 0 to indicate instability)
+        let restart_str = info.restart_count.map_or("-".to_string(), |count| {
+            if count > 0 {
+                format!("!{}", count)
+            } else {
+                "0".to_string()
+            }
+        });
+
        format!(
-            "{:<23} {:<10}",
-            short_name, status_str
+            "{:<23} {:<10} {:<8} {:<8} {:<5}",
+            short_name, status_str, memory_str, uptime_str, restart_str
        )
    }

@@ -280,6 +317,9 @@ impl Widget for ServicesWidget {
                metrics: Vec::new(), // Parent services don't have custom metrics
                widget_status: service.service_status,
                service_type: String::new(), // Parent services have no type
+                memory_bytes: service.memory_bytes,
+                restart_count: service.restart_count,
+                uptime_seconds: service.uptime_seconds,
            };
            self.parent_services.insert(service.name.clone(), parent_info);

@@ -296,6 +336,9 @@ impl Widget for ServicesWidget {
                        metrics,
                        widget_status: sub_service.service_status,
                        service_type: sub_service.service_type.clone(),
+                        memory_bytes: None, // Sub-services don't have individual metrics yet
+                        restart_count: None,
+                        uptime_seconds: None,
                    };
                    sub_list.push((sub_service.name.clone(), sub_info));
                }
@@ -338,6 +381,9 @@ impl ServicesWidget {
                                    metrics: Vec::new(),
                                    widget_status: Status::Unknown,
                                    service_type: String::new(),
+                                    memory_bytes: None,
+                                    restart_count: None,
+                                    uptime_seconds: None,
                                });

                        if metric.name.ends_with("_status") {
@@ -364,6 +410,9 @@ impl ServicesWidget {
                                    metrics: Vec::new(),
                                    widget_status: Status::Unknown,
                                    service_type: String::new(), // Unknown type in legacy path
+                                    memory_bytes: None,
+                                    restart_count: None,
+                                    uptime_seconds: None,
                                },
                            ));
                            &mut sub_service_list.last_mut().unwrap().1
@@ -429,8 +478,8 @@ impl ServicesWidget {

        // Header
        let header = format!(
-            "{:<25} {:<10} {:<8} {:<8}",
-            "Service:", "Status:", "RAM:", "Disk:"
+            "{:<25} {:<10} {:<8} {:<8} {:<5}",
+            "Service:", "Status:", "RAM:", "Uptime:", "↻:"
        );
        let header_para = Paragraph::new(header).style(Typography::muted());
        frame.render_widget(header_para, content_chunks[0]);
--- a/dashboard/src/ui/widgets/system.rs
+++ b/dashboard/src/ui/widgets/system.rs
@@ -26,7 +26,7 @@ pub struct SystemWidget {
    cpu_load_1min: Option<f32>,
    cpu_load_5min: Option<f32>,
    cpu_load_15min: Option<f32>,
-    cpu_frequency: Option<f32>,
+    cpu_cstates: Vec<cm_dashboard_shared::CStateInfo>,
    cpu_status: Status,
    
    // Memory metrics
@@ -45,15 +45,9 @@ pub struct SystemWidget {
    storage_pools: Vec<StoragePool>,
    
    // Backup metrics
-    backup_status: String,
-    backup_start_time_raw: Option<String>,
-    backup_disk_serial: Option<String>,
-    backup_disk_usage_percent: Option<f32>,
-    backup_disk_used_gb: Option<f32>,
-    backup_disk_total_gb: Option<f32>,
-    backup_disk_wear_percent: Option<f32>,
-    backup_disk_temperature: Option<f32>,
-    backup_last_size_gb: Option<f32>,
+    backup_repositories: Vec<String>,
+    backup_repository_status: Status,
+    backup_disks: Vec<cm_dashboard_shared::BackupDiskData>,
    
    // Overall status
    has_data: bool,
@@ -102,7 +96,7 @@ impl SystemWidget {
            cpu_load_1min: None,
            cpu_load_5min: None,
            cpu_load_15min: None,
-            cpu_frequency: None,
+            cpu_cstates: Vec::new(),
            cpu_status: Status::Unknown,
            memory_usage_percent: None,
            memory_used_gb: None,
@@ -114,15 +108,9 @@ impl SystemWidget {
            tmp_status: Status::Unknown,
            tmpfs_mounts: Vec::new(),
            storage_pools: Vec::new(),
-            backup_status: "unknown".to_string(),
-            backup_start_time_raw: None,
-            backup_disk_serial: None,
-            backup_disk_usage_percent: None,
-            backup_disk_used_gb: None,
-            backup_disk_total_gb: None,
-            backup_disk_wear_percent: None,
-            backup_disk_temperature: None,
-            backup_last_size_gb: None,
+            backup_repositories: Vec::new(),
+            backup_repository_status: Status::Unknown,
+            backup_disks: Vec::new(),
            has_data: false,
        }
    }
@@ -137,12 +125,19 @@ impl SystemWidget {
        }
    }

-    /// Format CPU frequency
-    fn format_cpu_frequency(&self) -> String {
-        match self.cpu_frequency {
-            Some(freq) => format!("{:.0} MHz", freq),
-            None => "— MHz".to_string(),
+    /// Format CPU C-states (idle depth) with percentages
+    fn format_cpu_cstate(&self) -> String {
+        if self.cpu_cstates.is_empty() {
+            return "—".to_string();
        }
+
+        // Format top 3 C-states with percentages: "C10:79% C8:10% C6:8%"
+        // Agent already sends clean names (C3, C10, etc.)
+        self.cpu_cstates
+            .iter()
+            .map(|cs| format!("{}:{:.0}%", cs.name, cs.percent))
+            .collect::<Vec<_>>()
+            .join(" ")
    }

    /// Format memory usage
@@ -188,7 +183,7 @@ impl Widget for SystemWidget {
        self.cpu_load_1min = Some(cpu.load_1min);
        self.cpu_load_5min = Some(cpu.load_5min);
        self.cpu_load_15min = Some(cpu.load_15min);
-        self.cpu_frequency = Some(cpu.frequency_mhz);
+        self.cpu_cstates = cpu.cstates.clone();
        self.cpu_status = Status::Ok;

        // Extract memory data directly
@@ -214,25 +209,9 @@ impl Widget for SystemWidget {

        // Extract backup data
        let backup = &agent_data.backup;
-        self.backup_status = backup.status.clone();
-        self.backup_start_time_raw = backup.start_time_raw.clone();
-        self.backup_last_size_gb = backup.last_backup_size_gb;
-
-        if let Some(disk) = &backup.repository_disk {
-            self.backup_disk_serial = Some(disk.serial.clone());
-            self.backup_disk_usage_percent = Some(disk.usage_percent);
-            self.backup_disk_used_gb = Some(disk.used_gb);
-            self.backup_disk_total_gb = Some(disk.total_gb);
-            self.backup_disk_wear_percent = disk.wear_percent;
-            self.backup_disk_temperature = disk.temperature_celsius;
-        } else {
-            self.backup_disk_serial = None;
-            self.backup_disk_usage_percent = None;
-            self.backup_disk_used_gb = None;
-            self.backup_disk_total_gb = None;
-            self.backup_disk_wear_percent = None;
-            self.backup_disk_temperature = None;
-        }
+        self.backup_repositories = backup.repositories.clone();
+        self.backup_repository_status = backup.repository_status;
+        self.backup_disks = backup.disks.clone();
    }
 }

@@ -532,14 +511,36 @@ impl SystemWidget {
    fn render_backup(&self) -> Vec<Line<'_>> {
        let mut lines = Vec::new();

-        // First line: serial number with temperature and wear
-        if let Some(serial) = &self.backup_disk_serial {
-            let truncated_serial = truncate_serial(serial);
+        // First section: Repository status and list
+        if !self.backup_repositories.is_empty() {
+            let repo_text = format!("Repo: {}", self.backup_repositories.len());
+            let repo_spans = StatusIcons::create_status_spans(self.backup_repository_status, &repo_text);
+            lines.push(Line::from(repo_spans));
+
+            // List all repositories (sorted for consistent display)
+            let mut sorted_repos = self.backup_repositories.clone();
+            sorted_repos.sort();
+            let repo_count = sorted_repos.len();
+            for (idx, repo) in sorted_repos.iter().enumerate() {
+                let tree_char = if idx == repo_count - 1 { "└─" } else { "├─" };
+                lines.push(Line::from(vec![
+                    Span::styled(format!("  {} ", tree_char), Typography::tree()),
+                    Span::styled(repo.clone(), Typography::secondary()),
+                ]));
+            }
+        }
+
+        // Second section: Per-disk backup information (sorted by serial for consistent display)
+        let mut sorted_disks = self.backup_disks.clone();
+        sorted_disks.sort_by(|a, b| a.serial.cmp(&b.serial));
+        for disk in &sorted_disks {
+            let truncated_serial = truncate_serial(&disk.serial);
            let mut details = Vec::new();
-            if let Some(temp) = self.backup_disk_temperature {
+
+            if let Some(temp) = disk.temperature_celsius {
                details.push(format!("T: {}°C", temp as i32));
            }
-            if let Some(wear) = self.backup_disk_wear_percent {
+            if let Some(wear) = disk.wear_percent {
                details.push(format!("W: {}%", wear as i32));
            }

@@ -549,44 +550,40 @@ impl SystemWidget {
                truncated_serial
            };

-            let backup_status = match self.backup_status.as_str() {
-                "completed" | "success" => Status::Ok,
-                "running" => Status::Pending,
-                "failed" => Status::Critical,
-                _ => Status::Unknown,
-            };
-
-            let disk_spans = StatusIcons::create_status_spans(backup_status, &disk_text);
+            // Overall disk status (worst of backup and usage)
+            let disk_status = disk.backup_status.max(disk.usage_status);
+            let disk_spans = StatusIcons::create_status_spans(disk_status, &disk_text);
            lines.push(Line::from(disk_spans));

-            // Show backup time from TOML if available
-            if let Some(start_time) = &self.backup_start_time_raw {
-                let time_text = if let Some(size) = self.backup_last_size_gb {
-                    format!("Time: {} ({:.1}GB)", start_time, size)
+            // Show backup time with status
+            if let Some(backup_time) = &disk.last_backup_time {
+                let time_text = format!("Backup: {}", backup_time);
+                let mut time_spans = vec![
+                    Span::styled("  ├─ ", Typography::tree()),
+                ];
+                time_spans.extend(StatusIcons::create_status_spans(disk.backup_status, &time_text));
+                lines.push(Line::from(time_spans));
+            }
+
+            // Show usage with status and archive count
+            let archive_display = if disk.archives_min == disk.archives_max {
+                format!("{}", disk.archives_min)
            } else {
-                    format!("Time: {}", start_time)
+                format!("{}-{}", disk.archives_min, disk.archives_max)
            };

-                lines.push(Line::from(vec![
-                    Span::styled("  ├─ ", Typography::tree()),
-                    Span::styled(time_text, Typography::secondary())
-                ]));
-            }
-
-            // Usage information
-            if let (Some(used), Some(total), Some(usage_percent)) = (
-                self.backup_disk_used_gb, 
-                self.backup_disk_total_gb,
-                self.backup_disk_usage_percent
-            ) {
-                let usage_text = format!("Usage: {:.0}% {:.0}GB/{:.0}GB", usage_percent, used, total);
-                let usage_spans = StatusIcons::create_status_spans(Status::Ok, &usage_text);
-                let mut full_spans = vec![
+            let usage_text = format!(
+                "Usage: ({}) {:.0}% {:.0}GB/{:.0}GB",
+                archive_display,
+                disk.disk_usage_percent,
+                disk.disk_used_gb,
+                disk.disk_total_gb
+            );
+            let mut usage_spans = vec![
                Span::styled("  └─ ", Typography::tree()),
            ];
-                full_spans.extend(usage_spans);
-                lines.push(Line::from(full_spans));
-            }
+            usage_spans.extend(StatusIcons::create_status_spans(disk.usage_status, &usage_text));
+            lines.push(Line::from(usage_spans));
        }

        lines
@@ -832,10 +829,10 @@ impl SystemWidget {
        );
        lines.push(Line::from(cpu_spans));

-        let freq_text = self.format_cpu_frequency();
+        let cstate_text = self.format_cpu_cstate();
        lines.push(Line::from(vec![
            Span::styled("  └─ ", Typography::tree()),
-            Span::styled(format!("Freq: {}", freq_text), Typography::secondary())
+            Span::styled(format!("C-state: {}", cstate_text), Typography::secondary())
        ]));

        // RAM section
@@ -894,7 +891,7 @@ impl SystemWidget {
        lines.extend(storage_lines);

        // Backup section (if available)
-        if self.backup_status != "unavailable" && self.backup_status != "unknown" {
+        if !self.backup_repositories.is_empty() || !self.backup_disks.is_empty() {
            lines.push(Line::from(vec![
                Span::styled("Backup:", Typography::widget_title())
            ]));
--- a/shared/Cargo.toml
+++ b/shared/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cm-dashboard-shared"
-version = "0.1.204"
+version = "0.1.225"
 edition = "2021"

 [dependencies]
--- a/shared/src/agent_data.rs
+++ b/shared/src/agent_data.rs
@@ -40,13 +40,20 @@ pub struct NetworkInterfaceData {
    pub vlan_id: Option<u16>,
 }

+/// CPU C-state usage information
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CStateInfo {
+    pub name: String,
+    pub percent: f32,
+}
+
 /// CPU monitoring data
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CpuData {
    pub load_1min: f32,
    pub load_5min: f32,
    pub load_15min: f32,
-    pub frequency_mhz: f32,
+    pub cstates: Vec<CStateInfo>, // C-state usage percentages (C1, C6, C10, etc.) - indicates CPU idle depth distribution
    pub temperature_celsius: Option<f32>,
    pub load_status: Status,
    pub temperature_status: Status,
@@ -139,6 +146,12 @@ pub struct ServiceData {
    pub user_stopped: bool,
    pub service_status: Status,
    pub sub_services: Vec<SubServiceData>,
+    /// Memory usage in bytes (from MemoryCurrent)
+    pub memory_bytes: Option<u64>,
+    /// Number of service restarts (from NRestarts)
+    pub restart_count: Option<u32>,
+    /// Uptime in seconds (calculated from ExecMainStartTimestamp)
+    pub uptime_seconds: Option<u64>,
 }

 /// Sub-service data (nginx sites, docker containers, etc.)
@@ -163,23 +176,27 @@ pub struct SubServiceMetric {
 /// Backup system data
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct BackupData {
-    pub status: String,
-    pub total_size_gb: Option<f32>,
-    pub repository_health: Option<String>,
-    pub repository_disk: Option<BackupDiskData>,
-    pub last_backup_size_gb: Option<f32>,
-    pub start_time_raw: Option<String>,
+    pub repositories: Vec<String>,
+    pub repository_status: Status,
+    pub disks: Vec<BackupDiskData>,
 }

 /// Backup repository disk information
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct BackupDiskData {
    pub serial: String,
-    pub usage_percent: f32,
-    pub used_gb: f32,
-    pub total_gb: f32,
+    pub product_name: Option<String>,
    pub wear_percent: Option<f32>,
    pub temperature_celsius: Option<f32>,
+    pub last_backup_time: Option<String>,
+    pub backup_status: Status,
+    pub disk_usage_percent: f32,
+    pub disk_used_gb: f32,
+    pub disk_total_gb: f32,
+    pub usage_status: Status,
+    pub services: Vec<String>,
+    pub archives_min: i64,
+    pub archives_max: i64,
 }

 impl AgentData {
@@ -198,7 +215,7 @@ impl AgentData {
                    load_1min: 0.0,
                    load_5min: 0.0,
                    load_15min: 0.0,
-                    frequency_mhz: 0.0,
+                    cstates: Vec::new(),
                    temperature_celsius: None,
                    load_status: Status::Unknown,
                    temperature_status: Status::Unknown,
@@ -220,12 +237,9 @@ impl AgentData {
            },
            services: Vec::new(),
            backup: BackupData {
-                status: "unknown".to_string(),
-                total_size_gb: None,
-                repository_health: None,
-                repository_disk: None,
-                last_backup_size_gb: None,
-                start_time_raw: None,
+                repositories: Vec::new(),
+                repository_status: Status::Unknown,
+                disks: Vec::new(),
            },
        }
    }
Author	SHA1	Message	Date
Christoffer Martinsson	0b1d8c0a73	Fix Data_3 showing as unknown by handling smartctl warning exit codes All checks were successful Build and Release / build-and-release (push) Successful in 1m11s Details Root cause: sda's temperature exceeded threshold in the past, causing smartctl to return exit code 32 (warning: "Attributes have been <= threshold in the past"). The agent checked output.status.success() and rejected the entire output as failed, even though the data (serial, temperature, health) was perfectly valid. Smartctl exit codes are bit flags for informational warnings: - Exit 0: No warnings - Exit 32 (bit 5): Attributes were at/below threshold in past - Exit 64 (bit 6): Error log has entries - etc. The output data is valid regardless of these warning flags. Solution: Parse output as long as it's not empty, ignore exit code. Only return UNKNOWN if output is actually empty (command truly failed). Result: Data_3 will now show "ZDZ4VE0B T: 31°C" instead of "? Data_3: sda" Bump version to v0.1.225	2025-11-30 00:35:19 +01:00
Christoffer Martinsson	c77aa6eaaa	Fix Data_3 timeout by removing sequential SMART during pool detection All checks were successful Build and Release / build-and-release (push) Successful in 1m34s Details Root cause: SMART data was collected TWICE: 1. Sequential collection during pool detection in get_drive_info_for_path() using problematic tokio::task::block_in_place() nesting 2. Parallel collection in get_smart_data_for_drives() (v0.1.223) The sequential collection happened FIRST during pool detection, causing sda (Data_3) to timeout due to: - Bad async nesting: block_in_place() wrapping block_on() - Sequential execution causing runtime issues - sda being third in sequence, runtime degraded by then Solution: Remove SMART collection from get_drive_info_for_path(). Pool drive temperatures are populated later from the parallel SMART collection which properly uses futures::join_all. Benefits: - Eliminates problematic async nesting - All SMART queries happen once in parallel only - sda/Data_3 should now show serial (ZDZ4VE0B) and temperature Bump version to v0.1.224	2025-11-30 00:14:25 +01:00
Christoffer Martinsson	8a0e68f0e3	Fix Data_3 timeout by parallelizing SMART collection All checks were successful Build and Release / build-and-release (push) Successful in 1m10s Details Root cause: SMART data was collected sequentially, one drive at a time. With 5 drives taking ~500ms each, total collection time was 2.5+ seconds. When disk collector runs every 1 second, this caused overlapping collections creating resource contention. The last drive (sda/Data_3) would timeout due to the drive being accessed by the previous collection. Solution: Query all drives in parallel using futures::join_all. Now all drives get their SMART data collected simultaneously with independent 3-second timeouts, eliminating contention and reducing total collection time from 2.5+ seconds to ~500ms (the slowest single drive). Benefits: - All drives complete in ~500ms instead of 2.5+ seconds - No overlapping collections causing resource contention - Each drive gets full 3-second timeout window - sda/Data_3 should now show temperature and serial number Bump version to v0.1.223	2025-11-29 23:51:43 +01:00
Christoffer Martinsson	2d653fe9ae	Fix empty Storage section by configuring stdio pipes All checks were successful Build and Release / build-and-release (push) Successful in 1m15s Details Root cause: run_command_with_timeout() was calling cmd.spawn() without configuring stdout/stderr pipes. This caused command output to go to journald instead of being captured by wait_with_output(). The disk collector received empty output and failed silently. Solution: Configure stdout(Stdio::piped()) and stderr(Stdio::piped()) before spawning commands. This ensures wait_with_output() can properly capture command output. Fixes: Empty Storage section, lsblk output appearing in journald Bump version to v0.1.222	2025-11-29 23:25:17 +01:00
Christoffer Martinsson	caba78004e	Fix empty Storage section by properly aliasing command types All checks were successful Build and Release / build-and-release (push) Successful in 2m6s Details v0.1.220 broke disk collector by changing the import from std::process::Command to tokio::process::Command, but lines 193 and 767 explicitly used std::process::Command::new() which silently failed. Solution: Import both as aliases (TokioCommand/StdCommand) and use appropriate type for each operation - async commands use TokioCommand with run_command_with_timeout, sync commands use StdCommand with system timeout wrapper. Fixes: Empty Storage section after v0.1.220 deployment Bump version to v0.1.221	2025-11-29 21:29:33 +01:00
Christoffer Martinsson	77bf08a978	Fix blocking smartctl commands with proper async/timeout handling All checks were successful Build and Release / build-and-release (push) Successful in 2m2s Details - Changed disk collector to use tokio::process::Command instead of std::process::Command - Updated run_command_with_timeout to properly kill processes on timeout - Fixes issue where smartctl hangs on problematic drives (/dev/sda) freezing entire agent - Timeout now force-kills hung processes using kill -9, preventing orphaned smartctl processes This resolves the issue where Data_3 showed unknown status because smartctl was hanging indefinitely trying to read from a problematic drive, blocking the entire collector. Bump version to v0.1.220 Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-29 21:09:04 +01:00
Christoffer Martinsson	929870f8b6	Bump version to v0.1.219 All checks were successful Build and Release / build-and-release (push) Successful in 1m11s Details	2025-11-29 18:35:14 +01:00
Christoffer Martinsson	7aae852b7b	Bump version to v0.1.218 All checks were successful Build and Release / build-and-release (push) Successful in 1m19s Details	2025-11-29 17:59:33 +01:00
Christoffer Martinsson	40f3ff66d8	Show archive count range to detect inconsistencies - Display single number if all services have same count - Display min-max range if counts differ (indicates problem)	2025-11-29 17:59:24 +01:00
Christoffer Martinsson	1c1beddb55	Bump version to v0.1.217 All checks were successful Build and Release / build-and-release (push) Successful in 1m20s Details	2025-11-29 17:51:13 +01:00
Christoffer Martinsson	620d1f10b6	Show archive count per service instead of total sum	2025-11-29 17:51:01 +01:00
Christoffer Martinsson	a0d571a40e	Bump version to v0.1.216 All checks were successful Build and Release / build-and-release (push) Successful in 1m19s Details	2025-11-29 17:44:12 +01:00
Christoffer Martinsson	977200fff3	Move archive count to Usage line in backup display	2025-11-29 17:44:05 +01:00
Christoffer Martinsson	d692de5f83	Bump version to v0.1.215 All checks were successful Build and Release / build-and-release (push) Successful in 1m11s Details	2025-11-29 17:41:49 +01:00
Christoffer Martinsson	f5913dbd43	Add archive count to backup disk display	2025-11-29 17:41:11 +01:00
Christoffer Martinsson	faa30a7839	Sort backup repositories and disks for stable display All checks were successful Build and Release / build-and-release (push) Successful in 1m21s Details - Sort repositories alphabetically before rendering - Sort backup disks by serial number - Prevents display jumping between different orderings on updates - Consistent display order across refreshes Bump version to v0.1.214 Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-29 17:15:17 +01:00
Christoffer Martinsson	6e4a42799f	Bump version to v0.1.213 All checks were successful Build and Release / build-and-release (push) Successful in 1m22s Details	2025-11-29 16:46:16 +01:00
Christoffer Martinsson	afb8d68e03	Implement multi-disk backup support - Update BackupData structure to support multiple backup disks - Scan /var/lib/backup/status/ directory for all status files - Calculate status icons for backup and disk usage - Aggregate repository status from all disks - Update dashboard to display all backup disks with per-disk status - Display repository list with count and aggregated status	2025-11-29 16:44:50 +01:00
Christoffer Martinsson	5e08b34280	Move C-state name cleaning to agent for smaller JSON All checks were successful Build and Release / build-and-release (push) Successful in 1m32s Details - Agent now extracts "C" + digits pattern (C3, C10) using char parsing - Removes suffixes like "_ACPI", "_MWAIT" at source - Reduces JSON payload size over ZMQ - No regex dependency - uses fast char iteration (~1μs overhead) - Robust fallback to original name if pattern not found - Dashboard simplified to use clean names directly Bump version to v0.1.212 Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-29 14:05:55 +01:00
Christoffer Martinsson	0d8284b69c	Clean C-state display to show only CX format All checks were successful Build and Release / build-and-release (push) Successful in 1m18s Details - Strip suffixes like "_ACPI" from C-state names - Display changes from "C3_ACPI:51%" to "C3:51%" - Cleaner, more concise presentation Bump version to v0.1.211 Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-29 13:34:01 +01:00
Christoffer Martinsson	d84690cb3b	Move transmission interval to ZMQ config section All checks were successful Build and Release / build-and-release (push) Successful in 1m43s Details - Changed code to use zmq.transmission_interval_seconds instead of top-level collection_interval_seconds - Removed collection_interval_seconds from AgentConfig - Updated validation to check zmq.transmission_interval_seconds - Improves config organization by grouping all ZMQ settings together Bump version to v0.1.210 Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-29 13:31:39 +01:00
Christoffer Martinsson	7c030b33d6	Show top 3 C-states with usage percentages All checks were successful Build and Release / build-and-release (push) Successful in 1m21s Details - Changed CpuData.cstate from String to Vec<CStateInfo> - Added CStateInfo struct with name and percent fields - Collector calculates percentage for each C-state based on accumulated time - Sorts and returns top 3 C-states by usage - Dashboard displays: "C10:79% C8:10% C6:8%" Provides better visibility into CPU idle state distribution. Bump version to v0.1.209 Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-28 23:45:46 +01:00
Christoffer Martinsson	c6817537a8	Replace CPU frequency with C-state monitoring All checks were successful Build and Release / build-and-release (push) Successful in 1m20s Details - Changed CpuData.frequency_mhz to CpuData.cstate (String) - Implemented collect_cstate() to read CPU idle depth from sysfs - Finds deepest C-state with most accumulated time (C0-C10) - Updated dashboard to display C-state instead of frequency - More accurate indicator of CPU activity vs power management Bump version to v0.1.208 Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-28 23:30:14 +01:00
Christoffer Martinsson	2189d34b16	Bump version to v0.1.207 All checks were successful Build and Release / build-and-release (push) Successful in 1m9s Details	2025-11-28 23:16:33 +01:00
Christoffer Martinsson	28cfd5758f	Fix service metrics not showing - remove cache check The service_status_cache from discovery only has active_state with all detailed metrics set to None. During collection, get_service_status() was returning cached data instead of fetching fresh systemctl show data. Now always fetch fresh data to populate memory_bytes, restart_count, and uptime_seconds properly.	2025-11-28 23:15:51 +01:00
Christoffer Martinsson	5deb8cf8d8	Bump version to v0.1.206 All checks were successful Build and Release / build-and-release (push) Successful in 1m10s Details	2025-11-28 23:07:20 +01:00
Christoffer Martinsson	0e01813ff5	Add service metrics from systemctl (memory, uptime, restarts) Shared: - Add memory_bytes, restart_count, uptime_seconds to ServiceData Agent: - Add new fields to ServiceStatusInfo struct - Fetch MemoryCurrent, NRestarts, ExecMainStartTimestamp from systemctl show - Calculate uptime from start timestamp - Parse and populate new fields in ServiceData - Remove unused load_state and sub_state fields Dashboard: - Add memory_bytes, restart_count, uptime_seconds to ServiceInfo - Update header: Service, Status, RAM, Uptime, ↻ (restarts) - Format memory as MB/GB - Format uptime as Xd Xh, Xh Xm, or Xm - Show restart count with ! prefix if > 0 to indicate instability All metrics obtained from single systemctl show call - zero overhead.	2025-11-28 23:06:13 +01:00
Christoffer Martinsson	c3c9507a42	Bump version to v0.1.205 All checks were successful Build and Release / build-and-release (push) Successful in 1m22s Details	2025-11-28 22:37:28 +01:00
Christoffer Martinsson	4d77ffe17e	Remove RAM and Disk columns from services widget header Changed header from 4 columns to 2 columns: - Before: Service, Status, RAM, Disk - After: Service, Status Matches the removal of memory_mb and disk_gb fields.	2025-11-28 22:37:14 +01:00