diff --git a/agent/src/collectors/disk.rs b/agent/src/collectors/disk.rs index 8081857..e434ef4 100644 --- a/agent/src/collectors/disk.rs +++ b/agent/src/collectors/disk.rs @@ -3,25 +3,33 @@ use async_trait::async_trait; use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker, HysteresisThresholds}; use crate::config::DiskConfig; -use std::fs; -use std::path::Path; use std::process::Command; use std::time::Instant; use tracing::debug; use super::{Collector, CollectorError}; -/// Information about a mounted disk +/// Information about a storage pool (mount point with underlying drives) #[derive(Debug, Clone)] -struct MountedDisk { - device: String, // e.g., "/dev/nvme0n1p1" - physical_device: String, // e.g., "/dev/nvme0n1" - mount_point: String, // e.g., "/" - filesystem: String, // e.g., "ext4" - size: String, // e.g., "120G" - used: String, // e.g., "45G" - available: String, // e.g., "75G" - usage_percent: f32, // e.g., 38.5 +struct StoragePool { + name: String, // e.g., "steampool", "root" + mount_point: String, // e.g., "/mnt/steampool", "/" + filesystem: String, // e.g., "mergerfs", "ext4", "zfs", "btrfs" + storage_type: String, // e.g., "mergerfs", "single", "raid", "zfs" + size: String, // e.g., "2.5TB" + used: String, // e.g., "2.1TB" + available: String, // e.g., "400GB" + usage_percent: f32, // e.g., 85.0 + underlying_drives: Vec, // Individual physical drives +} + +/// Information about an individual physical drive +#[derive(Debug, Clone)] +struct DriveInfo { + device: String, // e.g., "sda", "nvme0n1" + health_status: String, // e.g., "PASSED", "FAILED" + temperature: Option, // e.g., 45.0°C + wear_level: Option, // e.g., 12.0% (for SSDs) } /// Disk usage collector for monitoring filesystem sizes @@ -51,96 +59,176 @@ impl DiskCollector { status_tracker.calculate_with_hysteresis(metric_name, temperature, &self.temperature_thresholds) } - /// Resolve UUID to actual device path - fn resolve_uuid_to_device(&self, uuid: &str) -> Result { - let uuid_path = format!("/dev/disk/by-uuid/{}", uuid); - - if Path::new(&uuid_path).exists() { - match fs::read_link(&uuid_path) { - Ok(target) => { - // Convert relative path to absolute - if target.is_relative() { - let parent = Path::new(&uuid_path).parent().unwrap(); - let resolved = parent.join(&target); - match resolved.canonicalize() { - Ok(canonical) => Ok(canonical.to_string_lossy().to_string()), - Err(_) => Ok(target.to_string_lossy().to_string()), - } - } else { - Ok(target.to_string_lossy().to_string()) - } - } - Err(e) => Err(anyhow::anyhow!("Failed to resolve UUID {}: {}", uuid, e)), - } - } else { - Err(anyhow::anyhow!("UUID {} not found in /dev/disk/by-uuid/", uuid)) - } - } - /// Get configured filesystems from UUIDs - fn get_configured_filesystems(&self) -> Result> { - let mut configured_disks = Vec::new(); + /// Get configured storage pools with individual drive information + fn get_configured_storage_pools(&self) -> Result> { + let mut storage_pools = Vec::new(); for fs_config in &self.config.filesystems { if !fs_config.monitor { continue; } - // Resolve UUID to device - match self.resolve_uuid_to_device(&fs_config.uuid) { - Ok(device_path) => { - // Get filesystem stats for the mount point - match self.get_filesystem_info(&fs_config.mount_point) { - Ok((total_bytes, used_bytes)) => { - let available_bytes = total_bytes - used_bytes; - let usage_percent = if total_bytes > 0 { - (used_bytes as f64 / total_bytes as f64) * 100.0 - } else { - 0.0 - }; + // Get filesystem stats for the mount point + match self.get_filesystem_info(&fs_config.mount_point) { + Ok((total_bytes, used_bytes)) => { + let available_bytes = total_bytes - used_bytes; + let usage_percent = if total_bytes > 0 { + (used_bytes as f64 / total_bytes as f64) * 100.0 + } else { + 0.0 + }; - // Convert bytes to human-readable format - let size = self.bytes_to_human_readable(total_bytes); - let used = self.bytes_to_human_readable(used_bytes); - let available = self.bytes_to_human_readable(available_bytes); + // Convert bytes to human-readable format + let size = self.bytes_to_human_readable(total_bytes); + let used = self.bytes_to_human_readable(used_bytes); + let available = self.bytes_to_human_readable(available_bytes); - // Get physical device for SMART monitoring - let physical_device = self.get_physical_device(&device_path)?; + // Get individual drive information + let underlying_drives = self.get_drive_info_for_devices(&fs_config.underlying_devices)?; - configured_disks.push(MountedDisk { - device: device_path.clone(), - physical_device, - mount_point: fs_config.mount_point.clone(), - filesystem: fs_config.fs_type.clone(), - size, - used, - available, - usage_percent: usage_percent as f32, - }); + storage_pools.push(StoragePool { + name: fs_config.name.clone(), + mount_point: fs_config.mount_point.clone(), + filesystem: fs_config.fs_type.clone(), + storage_type: fs_config.storage_type.clone(), + size, + used, + available, + usage_percent: usage_percent as f32, + underlying_drives, + }); - debug!( - "Configured filesystem '{}' (UUID: {}) mounted at {} using {}", - fs_config.name, fs_config.uuid, fs_config.mount_point, device_path - ); - } - Err(e) => { - debug!( - "Failed to get filesystem info for configured filesystem '{}': {}", - fs_config.name, e - ); - } - } + debug!( + "Storage pool '{}' ({}) at {} with {} underlying drives", + fs_config.name, fs_config.storage_type, fs_config.mount_point, fs_config.underlying_devices.len() + ); } Err(e) => { debug!( - "Failed to resolve UUID for configured filesystem '{}': {}", + "Failed to get filesystem info for storage pool '{}': {}", fs_config.name, e ); } } } - Ok(configured_disks) + Ok(storage_pools) + } + + /// Get drive information for a list of device names + fn get_drive_info_for_devices(&self, device_names: &[String]) -> Result> { + let mut drives = Vec::new(); + + for device_name in device_names { + let device_path = format!("/dev/{}", device_name); + + // Get SMART data for this drive + let (health_status, temperature, wear_level) = self.get_smart_data(&device_path); + + drives.push(DriveInfo { + device: device_name.clone(), + health_status: health_status.clone(), + temperature, + wear_level, + }); + + debug!( + "Drive info for {}: health={}, temp={:?}°C, wear={:?}%", + device_name, health_status, temperature, wear_level + ); + } + + Ok(drives) + } + + /// Get SMART data for a drive (health, temperature, wear level) + fn get_smart_data(&self, device_path: &str) -> (String, Option, Option) { + // Try to get SMART data using smartctl + let output = Command::new("sudo") + .arg("smartctl") + .arg("-a") + .arg(device_path) + .output(); + + match output { + Ok(result) if result.status.success() => { + let stdout = String::from_utf8_lossy(&result.stdout); + + // Parse health status + let health = if stdout.contains("PASSED") { + "PASSED".to_string() + } else if stdout.contains("FAILED") { + "FAILED".to_string() + } else { + "UNKNOWN".to_string() + }; + + // Parse temperature (look for various temperature indicators) + let temperature = self.parse_temperature_from_smart(&stdout); + + // Parse wear level (for SSDs) + let wear_level = self.parse_wear_level_from_smart(&stdout); + + (health, temperature, wear_level) + } + _ => { + debug!("Failed to get SMART data for {}", device_path); + ("UNKNOWN".to_string(), None, None) + } + } + } + + /// Parse temperature from SMART output + fn parse_temperature_from_smart(&self, smart_output: &str) -> Option { + for line in smart_output.lines() { + // Look for temperature in various formats + if line.contains("Temperature_Celsius") || line.contains("Temperature") { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() >= 10 { + if let Ok(temp) = parts[9].parse::() { + return Some(temp); + } + } + } + // NVMe drives might show temperature differently + if line.contains("temperature:") { + if let Some(temp_part) = line.split("temperature:").nth(1) { + if let Some(temp_str) = temp_part.split_whitespace().next() { + if let Ok(temp) = temp_str.parse::() { + return Some(temp); + } + } + } + } + } + None + } + + /// Parse wear level from SMART output (SSD wear leveling) + fn parse_wear_level_from_smart(&self, smart_output: &str) -> Option { + for line in smart_output.lines() { + // Look for wear leveling indicators + if line.contains("Wear_Leveling_Count") || line.contains("Media_Wearout_Indicator") { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() >= 10 { + if let Ok(wear) = parts[9].parse::() { + return Some(100.0 - wear); // Convert to percentage used + } + } + } + // NVMe drives might show percentage used directly + if line.contains("Percentage Used:") { + if let Some(wear_part) = line.split("Percentage Used:").nth(1) { + if let Some(wear_str) = wear_part.split('%').next() { + if let Ok(wear) = wear_str.trim().parse::() { + return Some(wear); + } + } + } + } + } + None } /// Convert bytes to human-readable format @@ -218,79 +306,6 @@ impl DiskCollector { Ok((total_bytes, used_bytes)) } - - - /// Get the physical device for a given device (resolves symlinks, gets parent device) - fn get_physical_device(&self, device: &str) -> Result { - // For NVMe: /dev/nvme0n1p1 -> /dev/nvme0n1 - if device.contains("nvme") && device.contains("p") { - if let Some(base) = device.split('p').next() { - return Ok(base.to_string()); - } - } - - // For SATA: /dev/sda1 -> /dev/sda - if device.starts_with("/dev/sd") && device.len() > 8 { - return Ok(device[..8].to_string()); // Keep /dev/sdX - } - - // For VirtIO: /dev/vda1 -> /dev/vda - if device.starts_with("/dev/vd") && device.len() > 8 { - return Ok(device[..8].to_string()); - } - - // If no partition detected, return as-is - Ok(device.to_string()) - } - - /// Get SMART health for a specific physical device - fn get_smart_health(&self, device: &str) -> (String, f32) { - if let Ok(output) = Command::new("sudo") - .arg("smartctl") - .arg("-H") - .arg(device) - .output() - { - if output.status.success() { - let output_str = String::from_utf8_lossy(&output.stdout); - let health_status = if output_str.contains("PASSED") { - "PASSED" - } else if output_str.contains("FAILED") { - "FAILED" - } else { - "UNKNOWN" - }; - - // Try to get temperature - let temperature = if let Ok(temp_output) = Command::new("sudo") - .arg("smartctl") - .arg("-A") - .arg(device) - .output() - { - let temp_str = String::from_utf8_lossy(&temp_output.stdout); - // Look for temperature in SMART attributes - for line in temp_str.lines() { - if line.contains("Temperature") && line.contains("Celsius") { - if let Some(temp_part) = line.split_whitespace().nth(9) { - if let Ok(temp) = temp_part.parse::() { - return (health_status.to_string(), temp); - } - } - } - } - 0.0 - } else { - 0.0 - }; - - return (health_status.to_string(), temperature); - } - } - - ("UNKNOWN".to_string(), 0.0) - } - /// Calculate status based on usage percentage fn calculate_usage_status(&self, used_bytes: u64, total_bytes: u64) -> Status { if total_bytes == 0 { @@ -350,197 +365,168 @@ impl Collector for DiskCollector { async fn collect(&self, status_tracker: &mut StatusTracker) -> Result, CollectorError> { let start_time = Instant::now(); - debug!("Collecting multi-disk metrics"); + debug!("Collecting storage pool and individual drive metrics"); let mut metrics = Vec::new(); - // Use UUID-based configured filesystems - let mounted_disks = match self.get_configured_filesystems() { - Ok(configured) => { - debug!("Using UUID-based filesystems: {} found", configured.len()); - configured + // Get configured storage pools with individual drive data + let storage_pools = match self.get_configured_storage_pools() { + Ok(pools) => { + debug!("Found {} storage pools", pools.len()); + pools } Err(e) => { - debug!("Failed to get configured filesystems: {}", e); + debug!("Failed to get storage pools: {}", e); Vec::new() } }; - // Process discovered/configured disks - if !mounted_disks.is_empty() { - debug!("Found {} mounted disks", mounted_disks.len()); + // Generate metrics for each storage pool and its underlying drives + for storage_pool in &storage_pools { + let timestamp = chrono::Utc::now().timestamp() as u64; - // Group disks by physical device to avoid duplicate SMART checks - let mut physical_devices: std::collections::HashMap> = - std::collections::HashMap::new(); - for disk in &mounted_disks { - physical_devices - .entry(disk.physical_device.clone()) - .or_insert_with(Vec::new) - .push(disk); - } + // Storage pool overall metrics + let pool_name = &storage_pool.name; + + // Parse size strings to get actual values for calculations + let size_gb = self.parse_size_to_gb(&storage_pool.size); + let used_gb = self.parse_size_to_gb(&storage_pool.used); + let avail_gb = self.parse_size_to_gb(&storage_pool.available); - // Generate metrics for each mounted disk - for (disk_index, disk) in mounted_disks.iter().enumerate() { - let timestamp = chrono::Utc::now().timestamp() as u64; + // Calculate status based on configured thresholds + let pool_status = if storage_pool.usage_percent >= self.config.usage_critical_percent { + Status::Critical + } else if storage_pool.usage_percent >= self.config.usage_warning_percent { + Status::Warning + } else { + Status::Ok + }; - // Always use index for metric names to maintain dashboard compatibility - let disk_name = disk_index.to_string(); + // Storage pool info metrics + metrics.push(Metric { + name: format!("disk_{}_mount_point", pool_name), + value: MetricValue::String(storage_pool.mount_point.clone()), + unit: None, + description: Some(format!("Mount: {}", storage_pool.mount_point)), + status: Status::Ok, + timestamp, + }); - // Parse size strings to get actual values for calculations - let size_gb = self.parse_size_to_gb(&disk.size); - let used_gb = self.parse_size_to_gb(&disk.used); - let avail_gb = self.parse_size_to_gb(&disk.available); + metrics.push(Metric { + name: format!("disk_{}_filesystem", pool_name), + value: MetricValue::String(storage_pool.filesystem.clone()), + unit: None, + description: Some(format!("FS: {}", storage_pool.filesystem)), + status: Status::Ok, + timestamp, + }); - // Calculate status based on configured thresholds - let status = if disk.usage_percent >= self.config.usage_critical_percent { - Status::Critical - } else if disk.usage_percent >= self.config.usage_warning_percent { - Status::Warning - } else { - Status::Ok - }; + metrics.push(Metric { + name: format!("disk_{}_storage_type", pool_name), + value: MetricValue::String(storage_pool.storage_type.clone()), + unit: None, + description: Some(format!("Type: {}", storage_pool.storage_type)), + status: Status::Ok, + timestamp, + }); - // Device and mount point info + // Storage pool size metrics + metrics.push(Metric { + name: format!("disk_{}_total_gb", pool_name), + value: MetricValue::Float(size_gb), + unit: Some("GB".to_string()), + description: Some(format!("Total: {}", storage_pool.size)), + status: Status::Ok, + timestamp, + }); + + metrics.push(Metric { + name: format!("disk_{}_used_gb", pool_name), + value: MetricValue::Float(used_gb), + unit: Some("GB".to_string()), + description: Some(format!("Used: {}", storage_pool.used)), + status: pool_status, + timestamp, + }); + + metrics.push(Metric { + name: format!("disk_{}_available_gb", pool_name), + value: MetricValue::Float(avail_gb), + unit: Some("GB".to_string()), + description: Some(format!("Available: {}", storage_pool.available)), + status: Status::Ok, + timestamp, + }); + + metrics.push(Metric { + name: format!("disk_{}_usage_percent", pool_name), + value: MetricValue::Float(storage_pool.usage_percent), + unit: Some("%".to_string()), + description: Some(format!("Usage: {:.1}%", storage_pool.usage_percent)), + status: pool_status, + timestamp, + }); + + // Individual drive metrics for this storage pool + for drive in &storage_pool.underlying_drives { + // Drive health status metrics.push(Metric { - name: format!("disk_{}_device", disk_name), - value: MetricValue::String(disk.device.clone()), + name: format!("disk_{}_{}_health", pool_name, drive.device), + value: MetricValue::String(drive.health_status.clone()), unit: None, - description: Some(format!("Device: {}", disk.device)), - status: Status::Ok, + description: Some(format!("{}: {}", drive.device, drive.health_status)), + status: if drive.health_status == "PASSED" { Status::Ok } + else if drive.health_status == "FAILED" { Status::Critical } + else { Status::Unknown }, timestamp, }); - metrics.push(Metric { - name: format!("disk_{}_mount_point", disk_name), - value: MetricValue::String(disk.mount_point.clone()), - unit: None, - description: Some(format!("Mount: {}", disk.mount_point)), - status: Status::Ok, - timestamp, - }); - - metrics.push(Metric { - name: format!("disk_{}_filesystem", disk_name), - value: MetricValue::String(disk.filesystem.clone()), - unit: None, - description: Some(format!("FS: {}", disk.filesystem)), - status: Status::Ok, - timestamp, - }); - - // Size metrics - metrics.push(Metric { - name: format!("disk_{}_total_gb", disk_name), - value: MetricValue::Float(size_gb), - unit: Some("GB".to_string()), - description: Some(format!("Total: {}", disk.size)), - status: Status::Ok, - timestamp, - }); - - metrics.push(Metric { - name: format!("disk_{}_used_gb", disk_name), - value: MetricValue::Float(used_gb), - unit: Some("GB".to_string()), - description: Some(format!("Used: {}", disk.used)), - status, - timestamp, - }); - - metrics.push(Metric { - name: format!("disk_{}_available_gb", disk_name), - value: MetricValue::Float(avail_gb), - unit: Some("GB".to_string()), - description: Some(format!("Available: {}", disk.available)), - status: Status::Ok, - timestamp, - }); - - metrics.push(Metric { - name: format!("disk_{}_usage_percent", disk_name), - value: MetricValue::Float(disk.usage_percent), - unit: Some("%".to_string()), - description: Some(format!("Usage: {:.1}%", disk.usage_percent)), - status, - timestamp, - }); - - // Physical device name (for SMART health grouping) - let physical_device_name = disk - .physical_device - .strip_prefix("/dev/") - .unwrap_or(&disk.physical_device); - - metrics.push(Metric { - name: format!("disk_{}_physical_device", disk_name), - value: MetricValue::String(physical_device_name.to_string()), - unit: None, - description: Some(format!("Physical: {}", physical_device_name)), - status: Status::Ok, - timestamp, - }); - } - - // Add SMART health metrics for each unique physical device - for (physical_device, _disks) in physical_devices { - let (health_status, temperature) = self.get_smart_health(&physical_device); - let device_name = physical_device - .strip_prefix("/dev/") - .unwrap_or(&physical_device); - let timestamp = chrono::Utc::now().timestamp() as u64; - - let health_status_enum = match health_status.as_str() { - "PASSED" => Status::Ok, - "FAILED" => Status::Critical, - _ => Status::Unknown, - }; - + // Drive temperature + if let Some(temp) = drive.temperature { + let temp_status = self.calculate_temperature_status( + &format!("disk_{}_{}_temperature", pool_name, drive.device), + temp, + status_tracker + ); + metrics.push(Metric { - name: format!("disk_smart_{}_health", device_name), - value: MetricValue::String(health_status.clone()), - unit: None, - description: Some(format!("SMART Health: {}", health_status)), - status: health_status_enum, + name: format!("disk_{}_{}_temperature", pool_name, drive.device), + value: MetricValue::Float(temp), + unit: Some("°C".to_string()), + description: Some(format!("{}: {:.0}°C", drive.device, temp)), + status: temp_status, timestamp, }); - - if temperature > 0.0 { - let metric_name = format!("disk_smart_{}_temperature", device_name); - let temp_status = self.calculate_temperature_status(&metric_name, temperature, status_tracker); - - metrics.push(Metric { - name: format!("disk_smart_{}_temperature", device_name), - value: MetricValue::Float(temperature), - unit: Some("°C".to_string()), - description: Some(format!("Temperature: {:.0}°C", temperature)), - status: temp_status, - timestamp, - }); - } } - // Add disk count metric - metrics.push(Metric { - name: "disk_count".to_string(), - value: MetricValue::Integer(mounted_disks.len() as i64), - unit: None, - description: Some(format!("Total mounted disks: {}", mounted_disks.len())), - status: Status::Ok, - timestamp: chrono::Utc::now().timestamp() as u64, - }); - } else { - // No disks configured - add zero count metric - metrics.push(Metric { - name: "disk_count".to_string(), - value: MetricValue::Integer(0), - unit: None, - description: Some("No disks configured for monitoring".to_string()), - status: Status::Warning, - timestamp: chrono::Utc::now().timestamp() as u64, - }); + // Drive wear level (for SSDs) + if let Some(wear) = drive.wear_level { + let wear_status = if wear >= 90.0 { Status::Critical } + else if wear >= 80.0 { Status::Warning } + else { Status::Ok }; + + metrics.push(Metric { + name: format!("disk_{}_{}_wear_percent", pool_name, drive.device), + value: MetricValue::Float(wear), + unit: Some("%".to_string()), + description: Some(format!("{}: {:.0}% wear", drive.device, wear)), + status: wear_status, + timestamp, + }); + } + } } + // Add storage pool count metric + metrics.push(Metric { + name: "disk_count".to_string(), + value: MetricValue::Integer(storage_pools.len() as i64), + unit: None, + description: Some(format!("Total storage pools: {}", storage_pools.len())), + status: Status::Ok, + timestamp: chrono::Utc::now().timestamp() as u64, + }); + // Monitor /tmp directory size (keep existing functionality) match self.get_directory_size("/tmp") { Ok(tmp_size_bytes) => { diff --git a/agent/src/collectors/systemd.rs b/agent/src/collectors/systemd.rs index 1cb6ea7..ee17116 100644 --- a/agent/src/collectors/systemd.rs +++ b/agent/src/collectors/systemd.rs @@ -7,11 +7,14 @@ use std::time::Instant; use tracing::debug; use super::{Collector, CollectorError}; +use crate::config::SystemdConfig; /// Systemd collector for monitoring systemd services pub struct SystemdCollector { /// Cached state with thread-safe interior mutability state: RwLock, + /// Configuration for service monitoring + config: SystemdConfig, } /// Internal state for service caching @@ -32,7 +35,7 @@ struct ServiceCacheState { } impl SystemdCollector { - pub fn new() -> Self { + pub fn new(config: SystemdConfig) -> Self { Self { state: RwLock::new(ServiceCacheState { monitored_services: Vec::new(), @@ -42,6 +45,7 @@ impl SystemdCollector { last_nginx_check_time: None, nginx_check_interval_seconds: 30, // 30 seconds for nginx sites }), + config, } } @@ -128,13 +132,8 @@ impl SystemdCollector { .arg("--plain") .output()?; - // Get hostname to determine which user to check - let hostname = gethostname::gethostname().to_string_lossy().to_string(); - let target_user = match hostname.as_str() { - "steambox" | "cmbox" => "cm", - "simonbox" => "simon", - _ => "cm", // default to cm for unknown hosts - }; + // Use configured user mapping instead of hardcoded hostname logic + let target_user = &self.config.host_user_mapping; // Also get user unit files (user-level services) for target user let user_unit_files_output = Command::new("sudo") @@ -183,68 +182,9 @@ impl SystemdCollector { }; let mut services = Vec::new(); - // Skip setup/certificate services that don't need monitoring (from legacy) - let excluded_services = [ - "mosquitto-certs", - "immich-setup", - "phpfpm-kryddorten", - "phpfpm-mariehall2", - "acme-haasp.net", - "acme-selfsigned-haasp", - "borgbackup", - "haasp-site-deploy", - "mosquitto-backup", - "nginx-config-reload", - "sshd-keygen", - "sshd-unix-local@", - "sshd@", - "docker-prune", - "docker-registry-gar", - "ark-permissions", - ]; - - // Define patterns for services we want to monitor (from legacy) - let service_name_filters = [ - // Web applications - "gitea", - "immich", - "vaultwarden", - "unifi", - "wordpress", - "nginx", - "httpd", - // Databases - "postgresql", - "mysql", - "mariadb", - "redis", - "mongodb", - "mongod", - // Backup and storage - "borg", - "rclone", - // Container runtimes - "docker", - // CI/CD services - "gitea-actions", - "gitea-runner", - "actions-runner", - // Network services - "sshd", - "dnsmasq", - // MQTT and IoT services - "mosquitto", - "mqtt", - // PHP-FPM services - "phpfpm", - // Home automation - "haasp", - // Backup services - "backup", - // Game servers - "ark", - "sunshine", - ]; + // Use configuration instead of hardcoded values + let excluded_services = &self.config.excluded_services; + let service_name_filters = &self.config.service_name_filters; // Parse both unit files and loaded units let mut all_service_names = std::collections::HashSet::new(); @@ -295,7 +235,7 @@ impl SystemdCollector { // Skip excluded services first let mut is_excluded = false; - for excluded in &excluded_services { + for excluded in excluded_services { if service_name.contains(excluded) { debug!( "EXCLUDING service '{}' because it matches pattern '{}'", @@ -312,7 +252,7 @@ impl SystemdCollector { } // Check if this service matches our filter patterns - for pattern in &service_name_filters { + for pattern in service_name_filters { if service_name.contains(pattern) || pattern.contains(service_name) { debug!( "INCLUDING service '{}' because it matches pattern '{}'", @@ -411,22 +351,21 @@ impl SystemdCollector { } } - /// Get service disk usage - simple and deterministic + /// Get service disk usage - simplified and configuration-driven fn get_service_disk_usage(&self, service: &str) -> Option { - // 1. Check if service has defined directories - let defined_dirs = self.get_service_directories(service); - if !defined_dirs.is_empty() { - // Service has defined paths - use ONLY those - for dir in defined_dirs { + // 1. Check if service has configured directories (exact match only) + if let Some(dirs) = self.config.service_directories.get(service) { + // Service has configured paths - use the first accessible one + for dir in dirs { if let Some(size) = self.get_directory_size(dir) { return Some(size); } } - // If defined path failed, return None (shows as "-") - return None; + // If configured paths failed, return None (shows as 0) + return Some(0.0); } - // 2. No defined path - use systemctl WorkingDirectory + // 2. No configured path - use systemctl WorkingDirectory let output = Command::new("systemctl") .arg("show") .arg(format!("{}.service", service)) @@ -447,36 +386,6 @@ impl SystemdCollector { None } - /// Get defined service directories (highest priority) - fn get_service_directories(&self, service: &str) -> Vec<&str> { - match service { - // Game servers (ARK Survival Ascended) - HIGHEST PRIORITY - "ark-island" => vec!["/var/lib/ark-servers/island"], - "ark-scorched" => vec!["/var/lib/ark-servers/scorched"], - "ark-center" => vec!["/var/lib/ark-servers/center"], - "ark-aberration" => vec!["/var/lib/ark-servers/aberration"], - "ark-extinction" => vec!["/var/lib/ark-servers/extinction"], - "ark-ragnarok" => vec!["/var/lib/ark-servers/ragnarok"], - "ark-valguero" => vec!["/var/lib/ark-servers/valguero"], - - // Other services with defined paths - s if s.contains("docker") => vec!["/var/lib/docker", "/var/lib/docker/containers"], - s if s.contains("gitea") => vec!["/var/lib/gitea", "/opt/gitea", "/home/git", "/data/gitea"], - s if s.contains("nginx") => vec!["/var/log/nginx", "/var/www", "/usr/share/nginx"], - s if s.contains("immich") => vec!["/var/lib/immich", "/opt/immich", "/usr/src/app/upload"], - s if s.contains("postgres") => vec!["/var/lib/postgresql", "/var/lib/postgres"], - s if s.contains("mysql") => vec!["/var/lib/mysql"], - s if s.contains("redis") => vec!["/var/lib/redis", "/var/redis"], - s if s.contains("unifi") => vec!["/var/lib/unifi", "/opt/UniFi"], - s if s.contains("vaultwarden") => vec!["/var/lib/vaultwarden", "/opt/vaultwarden"], - s if s.contains("mosquitto") => vec!["/var/lib/mosquitto", "/etc/mosquitto"], - s if s.contains("postfix") => vec!["/var/spool/postfix", "/var/lib/postfix"], - - // No defined path - will fall back to systemctl WorkingDirectory - _ => vec![], - } - } - diff --git a/agent/src/config/mod.rs b/agent/src/config/mod.rs index af22f8f..e9bd11f 100644 --- a/agent/src/config/mod.rs +++ b/agent/src/config/mod.rs @@ -82,8 +82,10 @@ pub struct FilesystemConfig { pub name: String, pub uuid: String, pub mount_point: String, - pub fs_type: String, + pub fs_type: String, // "ext4", "zfs", "xfs", "mergerfs", "btrfs" pub monitor: bool, + pub storage_type: String, // "single", "raid", "mirror", "mergerfs", "zfs" + pub underlying_devices: Vec, // ["sda", "sdb", "sdc"] or ["nvme0n1"] } @@ -96,6 +98,8 @@ pub struct SystemdConfig { pub excluded_services: Vec, pub memory_warning_mb: f32, pub memory_critical_mb: f32, + pub service_directories: std::collections::HashMap>, + pub host_user_mapping: String, } /// SMART collector configuration diff --git a/agent/src/metrics/mod.rs b/agent/src/metrics/mod.rs index 2fe3b33..8376cd5 100644 --- a/agent/src/metrics/mod.rs +++ b/agent/src/metrics/mod.rs @@ -51,7 +51,7 @@ impl MetricCollectionManager { } Some("systemd") => { // Systemd collector only - let systemd_collector = SystemdCollector::new(); + let systemd_collector = SystemdCollector::new(config.systemd.clone()); collectors.push(Box::new(systemd_collector)); info!("BENCHMARK: Systemd collector only"); } @@ -88,7 +88,7 @@ impl MetricCollectionManager { collectors.push(Box::new(disk_collector)); info!("Disk collector initialized"); - let systemd_collector = SystemdCollector::new(); + let systemd_collector = SystemdCollector::new(config.systemd.clone()); collectors.push(Box::new(systemd_collector)); info!("Systemd collector initialized");