use anyhow::Result; use async_trait::async_trait; use cm_dashboard_shared::{AgentData, ServiceData, Status}; use std::process::Command; use std::sync::RwLock; use std::time::Instant; use tracing::debug; use super::{Collector, CollectorError}; use crate::config::SystemdConfig; /// Systemd collector for monitoring systemd services with structured data output pub struct SystemdCollector { /// Cached state with thread-safe interior mutability state: RwLock, /// Configuration for service monitoring config: SystemdConfig, } /// Internal state for service caching #[derive(Debug, Clone)] struct ServiceCacheState { /// Last collection time for performance tracking last_collection: Option, /// Cached service data services: Vec, /// Interesting services to monitor (cached after discovery) monitored_services: Vec, /// Cached service status information from discovery service_status_cache: std::collections::HashMap, /// Last time services were discovered last_discovery_time: Option, /// How often to rediscover services (from config) discovery_interval_seconds: u64, } /// Cached service status information from systemctl list-units #[derive(Debug, Clone)] struct ServiceStatusInfo { load_state: String, active_state: String, sub_state: String, } /// Internal service information #[derive(Debug, Clone)] struct ServiceInfo { name: String, status: String, // "active", "inactive", "failed", etc. memory_mb: f32, // Memory usage in MB disk_gb: f32, // Disk usage in GB (usually 0 for services) } impl SystemdCollector { pub fn new(config: SystemdConfig) -> Self { let state = ServiceCacheState { last_collection: None, services: Vec::new(), monitored_services: Vec::new(), service_status_cache: std::collections::HashMap::new(), last_discovery_time: None, discovery_interval_seconds: config.interval_seconds, }; Self { state: RwLock::new(state), config, } } /// Collect service data and populate AgentData async fn collect_service_data(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> { let start_time = Instant::now(); debug!("Collecting systemd services metrics"); // Get cached services (discovery only happens when needed) let monitored_services = match self.get_monitored_services() { Ok(services) => services, Err(e) => { debug!("Failed to get monitored services: {}", e); return Ok(()); } }; // Collect service data for each monitored service let mut services = Vec::new(); for service_name in &monitored_services { match self.get_service_status(service_name) { Ok((active_status, _detailed_info)) => { let memory_mb = self.get_service_memory_usage(service_name).await.unwrap_or(0.0); let disk_gb = self.get_service_disk_usage(service_name).await.unwrap_or(0.0); let service_info = ServiceInfo { name: service_name.clone(), status: active_status, memory_mb, disk_gb, }; services.push(service_info); } Err(e) => { debug!("Failed to get status for service {}: {}", service_name, e); } } } // Update cached state { let mut state = self.state.write().unwrap(); state.last_collection = Some(start_time); state.services = services.clone(); } // Populate AgentData with service information for service in services { agent_data.services.push(ServiceData { name: service.name.clone(), status: service.status.clone(), memory_mb: service.memory_mb, disk_gb: service.disk_gb, user_stopped: false, // TODO: Integrate with service tracker service_status: self.calculate_service_status(&service.name, &service.status), }); } let elapsed = start_time.elapsed(); debug!("Systemd collection completed in {:?} with {} services", elapsed, agent_data.services.len()); Ok(()) } /// Get systemd services information async fn get_systemd_services(&self) -> Result, CollectorError> { let mut services = Vec::new(); // Get ALL service unit files (includes inactive services) let unit_files_output = Command::new("systemctl") .args(&["list-unit-files", "--type=service", "--no-pager", "--plain"]) .output() .map_err(|e| CollectorError::SystemRead { path: "systemctl list-unit-files".to_string(), error: e.to_string(), })?; // Get runtime status of ALL units (including inactive) let status_output = Command::new("systemctl") .args(&["list-units", "--type=service", "--all", "--no-pager", "--plain"]) .output() .map_err(|e| CollectorError::SystemRead { path: "systemctl list-units --all".to_string(), error: e.to_string(), })?; let unit_files_str = String::from_utf8_lossy(&unit_files_output.stdout); let status_str = String::from_utf8_lossy(&status_output.stdout); // Parse all service unit files to get complete service list let mut all_service_names = std::collections::HashSet::new(); for line in unit_files_str.lines() { let fields: Vec<&str> = line.split_whitespace().collect(); if fields.len() >= 2 && fields[0].ends_with(".service") { let service_name = fields[0].trim_end_matches(".service"); all_service_names.insert(service_name.to_string()); } } // Parse runtime status for all units let mut status_cache = std::collections::HashMap::new(); for line in status_str.lines() { let fields: Vec<&str> = line.split_whitespace().collect(); if fields.len() >= 4 && fields[0].ends_with(".service") { let service_name = fields[0].trim_end_matches(".service"); let load_state = fields.get(1).unwrap_or(&"unknown").to_string(); let active_state = fields.get(2).unwrap_or(&"unknown").to_string(); let sub_state = fields.get(3).unwrap_or(&"unknown").to_string(); status_cache.insert(service_name.to_string(), (load_state, active_state, sub_state)); } } // For services found in unit files but not in runtime status, set default inactive status for service_name in &all_service_names { if !status_cache.contains_key(service_name) { status_cache.insert(service_name.to_string(), ( "not-loaded".to_string(), "inactive".to_string(), "dead".to_string() )); } } // Process all discovered services and apply filters for service_name in &all_service_names { if self.should_monitor_service(service_name) { if let Some((load_state, active_state, sub_state)) = status_cache.get(service_name) { let memory_mb = self.get_service_memory_usage(service_name).await.unwrap_or(0.0); let disk_gb = self.get_service_disk_usage(service_name).await.unwrap_or(0.0); let normalized_status = self.normalize_service_status(active_state, sub_state); let service_info = ServiceInfo { name: service_name.to_string(), status: normalized_status, memory_mb, disk_gb, }; services.push(service_info); } } } Ok(services) } /// Check if a service should be monitored based on configuration filters with wildcard support fn should_monitor_service(&self, service_name: &str) -> bool { // If no filters configured, monitor nothing (to prevent noise) if self.config.service_name_filters.is_empty() { return false; } // Check if service matches any of the configured patterns for pattern in &self.config.service_name_filters { if self.matches_pattern(service_name, pattern) { return true; } } false } /// Check if service name matches pattern (supports wildcards like nginx*) fn matches_pattern(&self, service_name: &str, pattern: &str) -> bool { if pattern.ends_with('*') { let prefix = &pattern[..pattern.len() - 1]; service_name.starts_with(prefix) } else { service_name == pattern } } /// Get disk usage for a specific service async fn get_service_disk_usage(&self, service_name: &str) -> Result { // Check if this service has configured directory paths if let Some(dirs) = self.config.service_directories.get(service_name) { // Service has configured paths - use the first accessible one for dir in dirs { if let Some(size) = self.get_directory_size(dir) { return Ok(size); } } // If configured paths failed, return 0 return Ok(0.0); } // No configured path - try to get WorkingDirectory from systemctl let output = Command::new("systemctl") .args(&["show", &format!("{}.service", service_name), "--property=WorkingDirectory"]) .output() .map_err(|e| CollectorError::SystemRead { path: format!("WorkingDirectory for {}", service_name), error: e.to_string(), })?; let output_str = String::from_utf8_lossy(&output.stdout); for line in output_str.lines() { if line.starts_with("WorkingDirectory=") && !line.contains("[not set]") { let dir = line.strip_prefix("WorkingDirectory=").unwrap_or(""); if !dir.is_empty() { return Ok(self.get_directory_size(dir).unwrap_or(0.0)); } } } Ok(0.0) } /// Get size of a directory in GB fn get_directory_size(&self, path: &str) -> Option { let output = Command::new("du") .args(&["-sb", path]) .output() .ok()?; if !output.status.success() { return None; } let output_str = String::from_utf8_lossy(&output.stdout); let parts: Vec<&str> = output_str.split_whitespace().collect(); if let Some(size_str) = parts.first() { if let Ok(size_bytes) = size_str.parse::() { return Some(size_bytes as f32 / (1024.0 * 1024.0 * 1024.0)); } } None } /// Calculate service status, taking user-stopped services into account fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status { match active_status.to_lowercase().as_str() { "active" => Status::Ok, "inactive" | "dead" => { debug!("Service '{}' is inactive - treating as Inactive status", service_name); Status::Inactive }, "failed" | "error" => Status::Critical, "activating" | "deactivating" | "reloading" | "starting" | "stopping" => { debug!("Service '{}' is transitioning - treating as Pending", service_name); Status::Pending }, _ => Status::Unknown, } } /// Get memory usage for a specific service async fn get_service_memory_usage(&self, service_name: &str) -> Result { let output = Command::new("systemctl") .args(&["show", &format!("{}.service", service_name), "--property=MemoryCurrent"]) .output() .map_err(|e| CollectorError::SystemRead { path: format!("memory usage for {}", service_name), error: e.to_string(), })?; let output_str = String::from_utf8_lossy(&output.stdout); for line in output_str.lines() { if line.starts_with("MemoryCurrent=") { if let Some(mem_str) = line.strip_prefix("MemoryCurrent=") { if mem_str != "[not set]" { if let Ok(memory_bytes) = mem_str.parse::() { return Ok(memory_bytes as f32 / (1024.0 * 1024.0)); // Convert to MB } } } } } Ok(0.0) } /// Normalize service status to standard values fn normalize_service_status(&self, active_state: &str, sub_state: &str) -> String { match (active_state, sub_state) { ("active", "running") => "active".to_string(), ("active", _) => "active".to_string(), ("inactive", "dead") => "inactive".to_string(), ("inactive", _) => "inactive".to_string(), ("failed", _) => "failed".to_string(), ("activating", _) => "starting".to_string(), ("deactivating", _) => "stopping".to_string(), _ => format!("{}:{}", active_state, sub_state), } } /// Check if service collection cache should be updated fn should_update_cache(&self) -> bool { let state = self.state.read().unwrap(); match state.last_collection { None => true, Some(last) => { let cache_duration = std::time::Duration::from_secs(30); last.elapsed() > cache_duration } } } /// Get cached service data if available and fresh fn get_cached_services(&self) -> Option> { if !self.should_update_cache() { let state = self.state.read().unwrap(); Some(state.services.clone()) } else { None } } } #[async_trait] impl Collector for SystemdCollector { async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> { // Use cached data if available and fresh if let Some(cached_services) = self.get_cached_services() { debug!("Using cached systemd services data"); for service in cached_services { agent_data.services.push(ServiceData { name: service.name.clone(), status: service.status.clone(), memory_mb: service.memory_mb, disk_gb: service.disk_gb, user_stopped: false, // TODO: Integrate with service tracker service_status: self.calculate_service_status(&service.name, &service.status), }); } Ok(()) } else { // Collect fresh data self.collect_service_data(agent_data).await } } }