diff --git a/CLAUDE.md b/CLAUDE.md index b289cf1..6b74235 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -311,8 +311,12 @@ Agent → ["cpu_load_1min", "memory_usage_percent", ...] → Dashboard → Widge - CLI: `cm-dashboard-agent [-v]` (intelligent caching enabled) **Service Discovery:** -- Scans running systemd services -- Filters by predefined interesting patterns (gitea, nginx, docker, etc.) +- Scans ALL systemd services (active, inactive, failed, dead, etc.) using list-unit-files and list-units --all +- Discovers both system services and user services per host: + - steambox/cmbox: reads system + cm user services + - simonbox: reads system + simon user services +- Filters by service_name_filters patterns (gitea, nginx, docker, sunshine, etc.) +- Excludes maintenance services (docker-prune, sshd@, ark-permissions, etc.) - No host-specific hardcoded service lists ### Current Implementation Status diff --git a/agent/src/agent.rs b/agent/src/agent.rs index 3f41639..24378b4 100644 --- a/agent/src/agent.rs +++ b/agent/src/agent.rs @@ -23,12 +23,9 @@ impl Agent { let hostname = gethostname().to_string_lossy().to_string(); info!("Initializing agent for host: {}", hostname); - // Load configuration - let config = if let Some(path) = config_path { - AgentConfig::load_from_file(&path)? - } else { - AgentConfig::default() - }; + // Load configuration (now required) + let config_path = config_path.ok_or_else(|| anyhow::anyhow!("Configuration file path is required"))?; + let config = AgentConfig::from_file(&config_path)?; info!("Agent configuration loaded"); diff --git a/agent/src/collectors/systemd.rs b/agent/src/collectors/systemd.rs index 73c9ef9..3cbaab6 100644 --- a/agent/src/collectors/systemd.rs +++ b/agent/src/collectors/systemd.rs @@ -128,8 +128,19 @@ impl SystemdCollector { .arg("--plain") .output()?; - // Also get user unit files (user-level services) - let user_unit_files_output = Command::new("systemctl") + // Get hostname to determine which user to check + let hostname = gethostname::gethostname().to_string_lossy().to_string(); + let target_user = match hostname.as_str() { + "steambox" | "cmbox" => "cm", + "simonbox" => "simon", + _ => "cm", // default to cm for unknown hosts + }; + + // Also get user unit files (user-level services) for target user + let user_unit_files_output = Command::new("sudo") + .arg("-u") + .arg(target_user) + .arg("systemctl") .arg("--user") .arg("list-unit-files") .arg("--type=service") @@ -137,8 +148,11 @@ impl SystemdCollector { .arg("--plain") .output()?; - // And user loaded units - let user_units_output = Command::new("systemctl") + // And user loaded units for target user + let user_units_output = Command::new("sudo") + .arg("-u") + .arg(target_user) + .arg("systemctl") .arg("--user") .arg("list-units") .arg("--type=service") diff --git a/agent/src/config/defaults.rs b/agent/src/config/defaults.rs index c0eab29..60c4ac2 100644 --- a/agent/src/config/defaults.rs +++ b/agent/src/config/defaults.rs @@ -1,54 +1,2 @@ -// Collection intervals -pub const DEFAULT_COLLECTION_INTERVAL_SECONDS: u64 = 2; -pub const DEFAULT_CPU_INTERVAL_SECONDS: u64 = 5; -pub const DEFAULT_MEMORY_INTERVAL_SECONDS: u64 = 5; -pub const DEFAULT_DISK_INTERVAL_SECONDS: u64 = 300; // 5 minutes -pub const DEFAULT_PROCESS_INTERVAL_SECONDS: u64 = 30; -pub const DEFAULT_SYSTEMD_INTERVAL_SECONDS: u64 = 30; -pub const DEFAULT_SMART_INTERVAL_SECONDS: u64 = 900; // 15 minutes -pub const DEFAULT_BACKUP_INTERVAL_SECONDS: u64 = 900; // 15 minutes -pub const DEFAULT_NETWORK_INTERVAL_SECONDS: u64 = 30; - -// ZMQ configuration -pub const DEFAULT_ZMQ_PUBLISHER_PORT: u16 = 6130; -pub const DEFAULT_ZMQ_COMMAND_PORT: u16 = 6131; -pub const DEFAULT_ZMQ_BIND_ADDRESS: &str = "0.0.0.0"; -pub const DEFAULT_ZMQ_TIMEOUT_MS: u64 = 5000; -pub const DEFAULT_ZMQ_HEARTBEAT_INTERVAL_MS: u64 = 30000; - -// CPU thresholds (production values from legacy) -pub const DEFAULT_CPU_LOAD_WARNING: f32 = 9.0; -pub const DEFAULT_CPU_LOAD_CRITICAL: f32 = 10.0; -pub const DEFAULT_CPU_TEMP_WARNING: f32 = 100.0; // Effectively disabled -pub const DEFAULT_CPU_TEMP_CRITICAL: f32 = 100.0; // Effectively disabled - -// Memory thresholds (from legacy) -pub const DEFAULT_MEMORY_WARNING_PERCENT: f32 = 80.0; -pub const DEFAULT_MEMORY_CRITICAL_PERCENT: f32 = 95.0; - -// Disk thresholds -pub const DEFAULT_DISK_WARNING_PERCENT: f32 = 80.0; -pub const DEFAULT_DISK_CRITICAL_PERCENT: f32 = 90.0; - -// Process configuration -pub const DEFAULT_TOP_PROCESSES_COUNT: usize = 10; - -// Service thresholds -pub const DEFAULT_SERVICE_MEMORY_WARNING_MB: f32 = 1000.0; -pub const DEFAULT_SERVICE_MEMORY_CRITICAL_MB: f32 = 2000.0; - -// SMART thresholds -pub const DEFAULT_SMART_TEMP_WARNING: f32 = 60.0; -pub const DEFAULT_SMART_TEMP_CRITICAL: f32 = 70.0; -pub const DEFAULT_SMART_WEAR_WARNING: f32 = 80.0; -pub const DEFAULT_SMART_WEAR_CRITICAL: f32 = 90.0; - -// Backup configuration -pub const DEFAULT_BACKUP_MAX_AGE_HOURS: u64 = 48; - -// Notification configuration (from legacy) -pub const DEFAULT_SMTP_HOST: &str = "localhost"; -pub const DEFAULT_SMTP_PORT: u16 = 25; -pub const DEFAULT_FROM_EMAIL: &str = "{hostname}@cmtec.se"; -pub const DEFAULT_TO_EMAIL: &str = "cm@cmtec.se"; -pub const DEFAULT_NOTIFICATION_RATE_LIMIT_MINUTES: u64 = 0; +// This file is now empty - all configuration values come from config files +// No hardcoded defaults are used \ No newline at end of file diff --git a/agent/src/config/mod.rs b/agent/src/config/mod.rs index 381591f..17e8336 100644 --- a/agent/src/config/mod.rs +++ b/agent/src/config/mod.rs @@ -3,12 +3,9 @@ use cm_dashboard_shared::CacheConfig; use serde::{Deserialize, Serialize}; use std::path::Path; -pub mod defaults; pub mod loader; pub mod validation; -use defaults::*; - /// Main agent configuration #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AgentConfig { @@ -58,7 +55,9 @@ pub struct CpuConfig { pub struct MemoryConfig { pub enabled: bool, pub interval_seconds: u64, + /// Memory usage warning threshold (percentage) pub usage_warning_percent: f32, + /// Memory usage critical threshold (percentage) pub usage_critical_percent: f32, } @@ -67,19 +66,22 @@ pub struct MemoryConfig { pub struct DiskConfig { pub enabled: bool, pub interval_seconds: u64, + /// Disk usage warning threshold (percentage) pub usage_warning_percent: f32, + /// Disk usage critical threshold (percentage) pub usage_critical_percent: f32, + /// Filesystem configurations pub filesystems: Vec, } /// Filesystem configuration entry #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FilesystemConfig { - pub name: String, // Human-readable name (e.g., "root", "boot", "home") - pub uuid: String, // UUID for /dev/disk/by-uuid/ resolution - pub mount_point: String, // Expected mount point (e.g., "/", "/boot") - pub fs_type: String, // Filesystem type (e.g., "ext4", "vfat") - pub monitor: bool, // Whether to monitor this filesystem + pub name: String, + pub uuid: String, + pub mount_point: String, + pub fs_type: String, + pub monitor: bool, } /// Process collector configuration @@ -95,8 +97,8 @@ pub struct ProcessConfig { pub struct SystemdConfig { pub enabled: bool, pub interval_seconds: u64, - pub auto_discover: bool, - pub services: Vec, + pub service_name_filters: Vec, + pub excluded_services: Vec, pub memory_warning_mb: f32, pub memory_critical_mb: f32, } @@ -126,8 +128,6 @@ pub struct BackupConfig { pub struct NetworkConfig { pub enabled: bool, pub interval_seconds: u64, - pub interfaces: Vec, - pub auto_discover: bool, } /// Notification configuration @@ -142,275 +142,11 @@ pub struct NotificationConfig { } impl AgentConfig { - pub fn load_from_file>(path: P) -> Result { + pub fn from_file>(path: P) -> Result { loader::load_config(path) } pub fn validate(&self) -> Result<()> { validation::validate_config(self) } -} - -impl Default for AgentConfig { - fn default() -> Self { - Self { - zmq: ZmqConfig::default(), - collectors: CollectorConfig::default(), - cache: CacheConfig::default(), - notifications: NotificationConfig::default(), - collection_interval_seconds: DEFAULT_COLLECTION_INTERVAL_SECONDS, - } - } -} - -impl Default for ZmqConfig { - fn default() -> Self { - Self { - publisher_port: DEFAULT_ZMQ_PUBLISHER_PORT, - command_port: DEFAULT_ZMQ_COMMAND_PORT, - bind_address: DEFAULT_ZMQ_BIND_ADDRESS.to_string(), - timeout_ms: DEFAULT_ZMQ_TIMEOUT_MS, - heartbeat_interval_ms: DEFAULT_ZMQ_HEARTBEAT_INTERVAL_MS, - } - } -} - -impl Default for CollectorConfig { - fn default() -> Self { - Self { - cpu: CpuConfig::default(), - memory: MemoryConfig::default(), - disk: DiskConfig::default(), - processes: ProcessConfig::default(), - systemd: SystemdConfig::default(), - smart: SmartConfig::default(), - backup: BackupConfig::default(), - network: NetworkConfig::default(), - } - } -} - -impl Default for CpuConfig { - fn default() -> Self { - Self { - enabled: true, - interval_seconds: DEFAULT_CPU_INTERVAL_SECONDS, - load_warning_threshold: DEFAULT_CPU_LOAD_WARNING, - load_critical_threshold: DEFAULT_CPU_LOAD_CRITICAL, - temperature_warning_threshold: DEFAULT_CPU_TEMP_WARNING, - temperature_critical_threshold: DEFAULT_CPU_TEMP_CRITICAL, - } - } -} - -impl Default for MemoryConfig { - fn default() -> Self { - Self { - enabled: true, - interval_seconds: DEFAULT_MEMORY_INTERVAL_SECONDS, - usage_warning_percent: DEFAULT_MEMORY_WARNING_PERCENT, - usage_critical_percent: DEFAULT_MEMORY_CRITICAL_PERCENT, - } - } -} - -impl Default for DiskConfig { - fn default() -> Self { - let hostname = gethostname::gethostname().to_string_lossy().to_string(); - let filesystems = get_default_filesystems_for_host(&hostname); - - Self { - enabled: true, - interval_seconds: DEFAULT_DISK_INTERVAL_SECONDS, - usage_warning_percent: DEFAULT_DISK_WARNING_PERCENT, - usage_critical_percent: DEFAULT_DISK_CRITICAL_PERCENT, - filesystems, - } - } -} - -/// Get default filesystem configurations for known CMTEC hosts -fn get_default_filesystems_for_host(hostname: &str) -> Vec { - match hostname { - "cmbox" => vec![ - FilesystemConfig { - name: "root".to_string(), - uuid: "4cade5ce-85a5-4a03-83c8-dfd1d3888d79".to_string(), - mount_point: "/".to_string(), - fs_type: "ext4".to_string(), - monitor: true, - }, - FilesystemConfig { - name: "boot".to_string(), - uuid: "AB4D-62EC".to_string(), - mount_point: "/boot".to_string(), - fs_type: "vfat".to_string(), - monitor: true, - }, - ], - "srv02" => vec![ - FilesystemConfig { - name: "root".to_string(), - uuid: "5a880608-c79f-458f-a031-30206aa27ca7".to_string(), - mount_point: "/".to_string(), - fs_type: "ext4".to_string(), - monitor: true, - }, - FilesystemConfig { - name: "boot".to_string(), - uuid: "6B2E-2AD9".to_string(), - mount_point: "/boot".to_string(), - fs_type: "vfat".to_string(), - monitor: true, - }, - ], - "simonbox" => vec![ - FilesystemConfig { - name: "root".to_string(), - uuid: "b74284a9-2899-4f71-bdb0-fd07dc4baab3".to_string(), - mount_point: "/".to_string(), - fs_type: "ext4".to_string(), - monitor: true, - }, - FilesystemConfig { - name: "boot".to_string(), - uuid: "F6A3-AD2B".to_string(), - mount_point: "/boot".to_string(), - fs_type: "vfat".to_string(), - monitor: true, - }, - FilesystemConfig { - name: "steampool_1".to_string(), - uuid: "09300cb7-0938-4dba-8a42-7a7aaf60db51".to_string(), - mount_point: "/steampool_1".to_string(), - fs_type: "ext4".to_string(), - monitor: true, - }, - FilesystemConfig { - name: "steampool_2".to_string(), - uuid: "a2d61a41-3f2a-4760-b62e-5eb8caf50d1a".to_string(), - mount_point: "/steampool_2".to_string(), - fs_type: "ext4".to_string(), - monitor: true, - }, - ], - "steambox" => vec![ - FilesystemConfig { - name: "root".to_string(), - uuid: "4514ca9f-2d0a-40df-b14b-e342f39c3e6a".to_string(), - mount_point: "/".to_string(), - fs_type: "ext4".to_string(), - monitor: true, - }, - FilesystemConfig { - name: "boot".to_string(), - uuid: "8FD2-1B13".to_string(), - mount_point: "/boot".to_string(), - fs_type: "vfat".to_string(), - monitor: true, - }, - FilesystemConfig { - name: "steampool".to_string(), - uuid: "0ebe8abb-bbe7-4224-947b-86bf38981f60".to_string(), - mount_point: "/mnt/steampool".to_string(), - fs_type: "ext4".to_string(), - monitor: true, - }, - ], - "srv01" => vec![ - FilesystemConfig { - name: "root".to_string(), - uuid: "cd98df34-03a3-4d68-8338-d90d2920f9f8".to_string(), - mount_point: "/".to_string(), - fs_type: "ext4".to_string(), - monitor: true, - }, - FilesystemConfig { - name: "boot".to_string(), - uuid: "13E1-4DDE".to_string(), - mount_point: "/boot".to_string(), - fs_type: "vfat".to_string(), - monitor: true, - }, - ], - // labbox and wslbox have no UUIDs configured yet - "labbox" | "wslbox" => { - Vec::new() - }, - _ => { - // Unknown hosts use auto-discovery - Vec::new() - } - } -} - -impl Default for ProcessConfig { - fn default() -> Self { - Self { - enabled: true, - interval_seconds: DEFAULT_PROCESS_INTERVAL_SECONDS, - top_processes_count: DEFAULT_TOP_PROCESSES_COUNT, - } - } -} - -impl Default for SystemdConfig { - fn default() -> Self { - Self { - enabled: true, - interval_seconds: DEFAULT_SYSTEMD_INTERVAL_SECONDS, - auto_discover: true, - services: Vec::new(), - memory_warning_mb: DEFAULT_SERVICE_MEMORY_WARNING_MB, - memory_critical_mb: DEFAULT_SERVICE_MEMORY_CRITICAL_MB, - } - } -} - -impl Default for SmartConfig { - fn default() -> Self { - Self { - enabled: true, - interval_seconds: DEFAULT_SMART_INTERVAL_SECONDS, - temperature_warning_celsius: DEFAULT_SMART_TEMP_WARNING, - temperature_critical_celsius: DEFAULT_SMART_TEMP_CRITICAL, - wear_warning_percent: DEFAULT_SMART_WEAR_WARNING, - wear_critical_percent: DEFAULT_SMART_WEAR_CRITICAL, - } - } -} - -impl Default for BackupConfig { - fn default() -> Self { - Self { - enabled: true, - interval_seconds: DEFAULT_BACKUP_INTERVAL_SECONDS, - backup_paths: Vec::new(), - max_age_hours: DEFAULT_BACKUP_MAX_AGE_HOURS, - } - } -} - -impl Default for NetworkConfig { - fn default() -> Self { - Self { - enabled: true, - interval_seconds: DEFAULT_NETWORK_INTERVAL_SECONDS, - interfaces: Vec::new(), - auto_discover: true, - } - } -} - -impl Default for NotificationConfig { - fn default() -> Self { - Self { - enabled: true, - smtp_host: DEFAULT_SMTP_HOST.to_string(), - smtp_port: DEFAULT_SMTP_PORT, - from_email: DEFAULT_FROM_EMAIL.to_string(), - to_email: DEFAULT_TO_EMAIL.to_string(), - rate_limit_minutes: DEFAULT_NOTIFICATION_RATE_LIMIT_MINUTES, - } - } -} +} \ No newline at end of file diff --git a/agent/src/main.rs b/agent/src/main.rs index b78e845..4445b78 100644 --- a/agent/src/main.rs +++ b/agent/src/main.rs @@ -23,9 +23,9 @@ struct Cli { #[arg(short, long, action = clap::ArgAction::Count)] verbose: u8, - /// Configuration file path + /// Configuration file path (required) #[arg(short, long)] - config: Option, + config: String, } #[tokio::main] @@ -46,7 +46,7 @@ async fn main() -> Result<()> { info!("CM Dashboard Agent starting with individual metrics architecture..."); // Create and run agent - let mut agent = Agent::new(cli.config).await?; + let mut agent = Agent::new(Some(cli.config)).await?; // Setup graceful shutdown channel let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel();