From 33b3beb342378f4f243509414115362510ee1d0d Mon Sep 17 00:00:00 2001 From: Christoffer Martinsson Date: Sun, 23 Nov 2025 11:44:57 +0100 Subject: [PATCH] Implement storage auto-discovery system - Add automatic detection of mergerfs pools by parsing /proc/mounts - Implement smart heuristics for parity disk identification - Store discovered topology at agent startup for efficient monitoring - Eliminate need for manual storage pool configuration - Support zero-config storage visualization with backward compatibility - Clean up mount parsing and remove unused fields --- CLAUDE.md | 75 +++++++ Cargo.lock | 6 +- agent/Cargo.toml | 2 +- agent/src/collectors/disk.rs | 382 ++++++++++++++++++++++++++++++++++- dashboard/Cargo.toml | 2 +- shared/Cargo.toml | 2 +- 6 files changed, 452 insertions(+), 17 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 938b76e..4e496bf 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -144,6 +144,81 @@ nix-build --no-out-link -E 'with import {}; fetchurl { - **Workspace builds**: `nix-shell -p openssl pkg-config --run "cargo build --workspace"` - **Clean compilation**: Remove `target/` between major changes +## Enhanced Storage Pool Visualization + +### Auto-Discovery Architecture + +The dashboard uses automatic storage discovery to eliminate manual configuration complexity while providing intelligent storage pool grouping. + +### Discovery Process + +**At Agent Startup:** +1. Parse `/proc/mounts` to identify all mounted filesystems +2. Detect MergerFS pools by analyzing `fuse.mergerfs` mount sources +3. Identify member disks and potential parity relationships via heuristics +4. Store discovered storage topology for continuous monitoring +5. Generate pool-aware metrics with hierarchical relationships + +**Continuous Monitoring:** +- Use stored discovery data for efficient metric collection +- Monitor individual drives for SMART data, temperature, wear +- Calculate pool-level health based on member drive status +- Generate enhanced metrics for dashboard visualization + +### Supported Storage Types + +**Single Disks:** +- ext4, xfs, btrfs mounted directly +- Individual drive monitoring with SMART data +- Traditional single-disk display for root, boot, etc. + +**MergerFS Pools:** +- Auto-detect from `/proc/mounts` fuse.mergerfs entries +- Parse source paths to identify member disks (e.g., "/mnt/disk1:/mnt/disk2") +- Heuristic parity disk detection (sequential device names, "parity" in path) +- Pool health calculation (healthy/degraded/critical) +- Hierarchical tree display with data/parity disk grouping + +**Future Extensions Ready:** +- RAID arrays via `/proc/mdstat` parsing +- ZFS pools via `zpool status` integration +- LVM logical volumes via `lvs` discovery + +### Configuration + +```toml +[collectors.disk] +enabled = true +auto_discover = true # Default: true +# Optional exclusions for special filesystems +exclude_mount_points = ["/tmp", "/proc", "/sys", "/dev"] +exclude_fs_types = ["tmpfs", "devtmpfs", "sysfs", "proc"] +``` + +### Display Format + +``` +Storage: +● /srv/media (mergerfs (2+1)): + ├─ Pool Status: ● Healthy (3 drives) + ├─ Total: ● 63% 2355.2GB/3686.4GB + ├─ Data Disks: + │ ├─ ● sdb T: 24°C + │ └─ ● sdd T: 27°C + └─ Parity: ● sdc T: 24°C +● /: + ├─ ● nvme0n1 W: 13% + └─ ● 7% 14.5GB/218.5GB +``` + +### Implementation Benefits + +- **Zero Configuration**: No manual pool definitions required +- **Always Accurate**: Reflects actual system state automatically +- **Scales Automatically**: Handles any number of pools without config changes +- **Backwards Compatible**: Single disks continue working unchanged +- **Future Ready**: Easy extension for additional storage technologies + ## Important Communication Guidelines Keep responses concise and focused. Avoid extensive implementation summaries unless requested. diff --git a/Cargo.lock b/Cargo.lock index 3693a6c..3649d78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cm-dashboard" -version = "0.1.98" +version = "0.1.99" dependencies = [ "anyhow", "chrono", @@ -301,7 +301,7 @@ dependencies = [ [[package]] name = "cm-dashboard-agent" -version = "0.1.98" +version = "0.1.99" dependencies = [ "anyhow", "async-trait", @@ -324,7 +324,7 @@ dependencies = [ [[package]] name = "cm-dashboard-shared" -version = "0.1.98" +version = "0.1.99" dependencies = [ "chrono", "serde", diff --git a/agent/Cargo.toml b/agent/Cargo.toml index 2f3e5d5..b6e6d71 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-agent" -version = "0.1.99" +version = "0.1.100" edition = "2021" [dependencies] diff --git a/agent/src/collectors/disk.rs b/agent/src/collectors/disk.rs index a2f2154..6f7ab10 100644 --- a/agent/src/collectors/disk.rs +++ b/agent/src/collectors/disk.rs @@ -5,10 +5,34 @@ use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker, Hysteresis use crate::config::DiskConfig; use std::process::Command; use std::time::Instant; +use std::fs; use tracing::debug; use super::{Collector, CollectorError}; +/// Mount point information from /proc/mounts +#[derive(Debug, Clone)] +struct MountInfo { + device: String, // e.g., "/dev/sda1" or "/mnt/disk1:/mnt/disk2" + mount_point: String, // e.g., "/", "/srv/media" + fs_type: String, // e.g., "ext4", "xfs", "fuse.mergerfs" +} + +/// Auto-discovered storage topology +#[derive(Debug, Clone)] +struct StorageTopology { + single_disks: Vec, + mergerfs_pools: Vec, +} + +/// MergerFS pool information +#[derive(Debug, Clone)] +struct MergerfsPoolInfo { + mount_point: String, // e.g., "/srv/media" + data_members: Vec, // e.g., ["/mnt/disk1", "/mnt/disk2"] + parity_disks: Vec, // e.g., ["/mnt/parity"] +} + /// Information about a storage pool (mount point with underlying drives) #[derive(Debug, Clone)] struct StoragePool { @@ -70,6 +94,7 @@ pub struct DiskCollector { config: DiskConfig, temperature_thresholds: HysteresisThresholds, detected_devices: std::collections::HashMap>, // mount_point -> devices + storage_topology: Option, // Auto-discovered storage layout } impl DiskCollector { @@ -82,12 +107,57 @@ impl DiskCollector { 5.0, // 5°C gap for recovery ); - // Detect devices for all configured filesystems at startup + // Perform auto-discovery of storage topology + let storage_topology = match Self::auto_discover_storage() { + Ok(topology) => { + debug!("Auto-discovered storage topology: {} single disks, {} mergerfs pools", + topology.single_disks.len(), topology.mergerfs_pools.len()); + Some(topology) + } + Err(e) => { + debug!("Failed to auto-discover storage topology: {}", e); + None + } + }; + + // Detect devices for discovered storage let mut detected_devices = std::collections::HashMap::new(); - for fs_config in &config.filesystems { - if fs_config.monitor { - if let Ok(devices) = Self::detect_device_for_mount_point_static(&fs_config.mount_point) { - detected_devices.insert(fs_config.mount_point.clone(), devices); + if let Some(ref topology) = storage_topology { + // Add single disks + for disk in &topology.single_disks { + if let Ok(devices) = Self::detect_device_for_mount_point_static(&disk.mount_point) { + detected_devices.insert(disk.mount_point.clone(), devices); + } + } + + // Add mergerfs pools and their members + for pool in &topology.mergerfs_pools { + // Detect devices for the pool itself + if let Ok(devices) = Self::detect_device_for_mount_point_static(&pool.mount_point) { + detected_devices.insert(pool.mount_point.clone(), devices); + } + + // Detect devices for member disks + for member in &pool.data_members { + if let Ok(devices) = Self::detect_device_for_mount_point_static(member) { + detected_devices.insert(member.clone(), devices); + } + } + + // Detect devices for parity disks + for parity in &pool.parity_disks { + if let Ok(devices) = Self::detect_device_for_mount_point_static(parity) { + detected_devices.insert(parity.clone(), devices); + } + } + } + } else { + // Fallback: use legacy filesystem config detection + for fs_config in &config.filesystems { + if fs_config.monitor { + if let Ok(devices) = Self::detect_device_for_mount_point_static(&fs_config.mount_point) { + detected_devices.insert(fs_config.mount_point.clone(), devices); + } } } } @@ -96,21 +166,313 @@ impl DiskCollector { config, temperature_thresholds, detected_devices, + storage_topology, } } + /// Auto-discover storage topology by parsing system information + fn auto_discover_storage() -> Result { + let mounts = Self::parse_proc_mounts()?; + let mut single_disks = Vec::new(); + let mut mergerfs_pools = Vec::new(); + + // Filter out unwanted filesystem types and mount points + let exclude_fs_types = ["tmpfs", "devtmpfs", "sysfs", "proc", "cgroup", "cgroup2", "devpts"]; + let exclude_mount_prefixes = ["/proc", "/sys", "/dev", "/tmp", "/run"]; + + for mount in mounts { + // Skip excluded filesystem types + if exclude_fs_types.contains(&mount.fs_type.as_str()) { + continue; + } + + // Skip excluded mount point prefixes + if exclude_mount_prefixes.iter().any(|prefix| mount.mount_point.starts_with(prefix)) { + continue; + } + + match mount.fs_type.as_str() { + "fuse.mergerfs" => { + // Parse mergerfs pool + let data_members = Self::parse_mergerfs_sources(&mount.device); + let parity_disks = Self::detect_parity_disks(&data_members); + + mergerfs_pools.push(MergerfsPoolInfo { + mount_point: mount.mount_point.clone(), + data_members, + parity_disks, + }); + + debug!("Discovered mergerfs pool at {}", mount.mount_point); + } + "ext4" | "xfs" | "btrfs" | "ntfs" | "vfat" => { + // Check if this mount is part of a mergerfs pool + let is_mergerfs_member = mergerfs_pools.iter() + .any(|pool| pool.data_members.contains(&mount.mount_point) || + pool.parity_disks.contains(&mount.mount_point)); + + if !is_mergerfs_member { + debug!("Discovered single disk at {}", mount.mount_point); + single_disks.push(mount); + } + } + _ => { + debug!("Skipping unsupported filesystem type: {}", mount.fs_type); + } + } + } + + Ok(StorageTopology { + single_disks, + mergerfs_pools, + }) + } + + /// Parse /proc/mounts to get all mount information + fn parse_proc_mounts() -> Result> { + let mounts_content = fs::read_to_string("/proc/mounts")?; + let mut mounts = Vec::new(); + + for line in mounts_content.lines() { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() >= 3 { + mounts.push(MountInfo { + device: parts[0].to_string(), + mount_point: parts[1].to_string(), + fs_type: parts[2].to_string(), + }); + } + } + + Ok(mounts) + } + + /// Parse mergerfs source string to extract member paths + fn parse_mergerfs_sources(source: &str) -> Vec { + // MergerFS source format: "/mnt/disk1:/mnt/disk2:/mnt/disk3" + source.split(':') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect() + } + + /// Detect potential parity disks based on data member heuristics + fn detect_parity_disks(data_members: &[String]) -> Vec { + let mut parity_disks = Vec::new(); + + // Heuristic 1: Look for mount points with "parity" in the name + if let Ok(mounts) = Self::parse_proc_mounts() { + for mount in mounts { + if mount.mount_point.to_lowercase().contains("parity") && + (mount.fs_type == "xfs" || mount.fs_type == "ext4") { + debug!("Detected parity disk by name: {}", mount.mount_point); + parity_disks.push(mount.mount_point); + } + } + } + + // Heuristic 2: Look for sequential device pattern + // If data members are /mnt/disk1, /mnt/disk2, look for /mnt/disk* that's not in data + if parity_disks.is_empty() { + if let Some(pattern) = Self::extract_mount_pattern(data_members) { + if let Ok(mounts) = Self::parse_proc_mounts() { + for mount in mounts { + if mount.mount_point.starts_with(&pattern) && + !data_members.contains(&mount.mount_point) && + (mount.fs_type == "xfs" || mount.fs_type == "ext4") { + debug!("Detected parity disk by pattern: {}", mount.mount_point); + parity_disks.push(mount.mount_point); + } + } + } + } + } + + parity_disks + } + + /// Extract common mount point pattern from data members + fn extract_mount_pattern(data_members: &[String]) -> Option { + if data_members.is_empty() { + return None; + } + + // Find common prefix (e.g., "/mnt/disk" from "/mnt/disk1", "/mnt/disk2") + let first = &data_members[0]; + if let Some(last_slash) = first.rfind('/') { + let base = &first[..last_slash + 1]; // Include the slash + + // Check if all members share this base + if data_members.iter().all(|member| member.starts_with(base)) { + return Some(base.to_string()); + } + } + + None + } + /// Calculate disk temperature status using hysteresis thresholds fn calculate_temperature_status(&self, metric_name: &str, temperature: f32, status_tracker: &mut StatusTracker) -> Status { status_tracker.calculate_with_hysteresis(metric_name, temperature, &self.temperature_thresholds) } - /// Get configured storage pools with individual drive information + /// Get storage pools using auto-discovered topology or fallback to configuration fn get_configured_storage_pools(&self) -> Result> { + if let Some(ref topology) = self.storage_topology { + self.get_auto_discovered_storage_pools(topology) + } else { + self.get_legacy_configured_storage_pools() + } + } + + /// Get storage pools from auto-discovered topology + fn get_auto_discovered_storage_pools(&self, topology: &StorageTopology) -> Result> { + let mut storage_pools = Vec::new(); + + // Process single disks + for disk_info in &topology.single_disks { + if let Ok((total_bytes, used_bytes)) = self.get_filesystem_info(&disk_info.mount_point) { + let available_bytes = total_bytes - used_bytes; + let usage_percent = if total_bytes > 0 { + (used_bytes as f64 / total_bytes as f64) * 100.0 + } else { 0.0 }; + + let size = self.bytes_to_human_readable(total_bytes); + let used = self.bytes_to_human_readable(used_bytes); + let available = self.bytes_to_human_readable(available_bytes); + + let device_names = self.detected_devices.get(&disk_info.mount_point).cloned().unwrap_or_default(); + let underlying_drives = self.get_drive_info_for_devices(&device_names)?; + + // Generate simple name from mount point + let name = if disk_info.mount_point == "/" { + "root".to_string() + } else { + disk_info.mount_point.trim_start_matches('/').replace('/', "_") + }; + + storage_pools.push(StoragePool { + name, + mount_point: disk_info.mount_point.clone(), + filesystem: disk_info.fs_type.clone(), + pool_type: StoragePoolType::Single, + size, + used, + available, + usage_percent: usage_percent as f32, + underlying_drives, + pool_health: PoolHealth::Healthy, + }); + + debug!("Auto-discovered single disk: {} at {}", disk_info.fs_type, disk_info.mount_point); + } + } + + // Process mergerfs pools + for pool_info in &topology.mergerfs_pools { + if let Ok((total_bytes, used_bytes)) = self.get_filesystem_info(&pool_info.mount_point) { + let available_bytes = total_bytes - used_bytes; + let usage_percent = if total_bytes > 0 { + (used_bytes as f64 / total_bytes as f64) * 100.0 + } else { 0.0 }; + + let size = self.bytes_to_human_readable(total_bytes); + let used = self.bytes_to_human_readable(used_bytes); + let available = self.bytes_to_human_readable(available_bytes); + + // Collect all member and parity drives + let mut all_drives = Vec::new(); + + // Add data member drives + for member in &pool_info.data_members { + if let Some(devices) = self.detected_devices.get(member) { + all_drives.extend(devices.clone()); + } + } + + // Add parity drives + for parity in &pool_info.parity_disks { + if let Some(devices) = self.detected_devices.get(parity) { + all_drives.extend(devices.clone()); + } + } + + let underlying_drives = self.get_drive_info_for_devices(&all_drives)?; + + // Calculate pool health + let pool_health = self.calculate_mergerfs_pool_health(&pool_info.data_members, &pool_info.parity_disks, &underlying_drives); + + // Generate pool name from mount point + let name = pool_info.mount_point.trim_start_matches('/').replace('/', "_"); + + storage_pools.push(StoragePool { + name, + mount_point: pool_info.mount_point.clone(), + filesystem: "fuse.mergerfs".to_string(), + pool_type: StoragePoolType::MergerfsPool { + data_disks: pool_info.data_members.iter() + .filter_map(|member| self.detected_devices.get(member).and_then(|devices| devices.first().cloned())) + .collect(), + parity_disks: pool_info.parity_disks.iter() + .filter_map(|parity| self.detected_devices.get(parity).and_then(|devices| devices.first().cloned())) + .collect(), + }, + size, + used, + available, + usage_percent: usage_percent as f32, + underlying_drives, + pool_health, + }); + + debug!("Auto-discovered mergerfs pool: {} with {} data + {} parity disks", + pool_info.mount_point, pool_info.data_members.len(), pool_info.parity_disks.len()); + } + } + + Ok(storage_pools) + } + + /// Calculate pool health specifically for mergerfs pools + fn calculate_mergerfs_pool_health(&self, data_members: &[String], parity_disks: &[String], drives: &[DriveInfo]) -> PoolHealth { + // Get device names for data and parity drives + let mut data_device_names = Vec::new(); + let mut parity_device_names = Vec::new(); + + for member in data_members { + if let Some(devices) = self.detected_devices.get(member) { + data_device_names.extend(devices.clone()); + } + } + + for parity in parity_disks { + if let Some(devices) = self.detected_devices.get(parity) { + parity_device_names.extend(devices.clone()); + } + } + + let failed_data = drives.iter() + .filter(|d| data_device_names.contains(&d.device) && d.health_status != "PASSED") + .count(); + let failed_parity = drives.iter() + .filter(|d| parity_device_names.contains(&d.device) && d.health_status != "PASSED") + .count(); + + match (failed_data, failed_parity) { + (0, 0) => PoolHealth::Healthy, + (1, 0) => PoolHealth::Degraded, // Can recover with parity + (0, 1) => PoolHealth::Degraded, // Lost parity protection + _ => PoolHealth::Critical, // Multiple failures + } + } + + /// Fallback to legacy configuration-based storage pools + fn get_legacy_configured_storage_pools(&self) -> Result> { let mut storage_pools = Vec::new(); let mut processed_pools = std::collections::HashSet::new(); - // First pass: Create enhanced pools (mergerfs, etc.) + // Legacy implementation: use filesystem configuration for fs_config in &self.config.filesystems { if !fs_config.monitor { continue; @@ -147,9 +509,7 @@ impl DiskCollector { let available_bytes = total_bytes - used_bytes; let usage_percent = if total_bytes > 0 { (used_bytes as f64 / total_bytes as f64) * 100.0 - } else { - 0.0 - }; + } else { 0.0 }; // Convert bytes to human-readable format let size = self.bytes_to_human_readable(total_bytes); @@ -177,7 +537,7 @@ impl DiskCollector { }); debug!( - "Storage pool '{}' ({:?}) at {} with {} drives, health: {:?}", + "Legacy configured storage pool '{}' ({:?}) at {} with {} drives, health: {:?}", fs_config.name, pool_type, fs_config.mount_point, drive_count, pool_health ); } diff --git a/dashboard/Cargo.toml b/dashboard/Cargo.toml index df5a9c6..82cf404 100644 --- a/dashboard/Cargo.toml +++ b/dashboard/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard" -version = "0.1.99" +version = "0.1.100" edition = "2021" [dependencies] diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 6545a77..e1c041a 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-shared" -version = "0.1.99" +version = "0.1.100" edition = "2021" [dependencies]