use async_trait::async_trait; use cm_dashboard_shared::{AgentData, Status, HysteresisThresholds}; use tracing::debug; use super::{utils, Collector, CollectorError}; use crate::config::CpuConfig; /// Extremely efficient CPU metrics collector /// /// EFFICIENCY OPTIMIZATIONS: /// - Single /proc/loadavg read for all load metrics /// - Single /proc/stat read for CPU usage /// - Minimal string allocations /// - No process spawning /// - <0.1ms collection time target pub struct CpuCollector { load_thresholds: HysteresisThresholds, temperature_thresholds: HysteresisThresholds, } impl CpuCollector { pub fn new(config: CpuConfig) -> Self { // Create hysteresis thresholds with 10% gap for recovery let load_thresholds = HysteresisThresholds::new( config.load_warning_threshold, config.load_critical_threshold, ); let temperature_thresholds = HysteresisThresholds::new( config.temperature_warning_threshold, config.temperature_critical_threshold, ); Self { load_thresholds, temperature_thresholds, } } /// Calculate CPU load status using thresholds fn calculate_load_status(&self, load: f32) -> Status { if load >= self.load_thresholds.critical_high { Status::Critical } else if load >= self.load_thresholds.warning_high { Status::Warning } else { Status::Ok } } /// Calculate CPU temperature status using thresholds fn calculate_temperature_status(&self, temp: f32) -> Status { if temp >= self.temperature_thresholds.critical_high { Status::Critical } else if temp >= self.temperature_thresholds.warning_high { Status::Warning } else { Status::Ok } } /// Collect CPU load averages and populate AgentData /// Format: "0.52 0.58 0.59 1/257 12345" async fn collect_load_averages(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> { let content = utils::read_proc_file("/proc/loadavg")?; let parts: Vec<&str> = content.trim().split_whitespace().collect(); if parts.len() < 3 { return Err(CollectorError::Parse { value: content, error: "Expected at least 3 values in /proc/loadavg".to_string(), }); } let load_1min = utils::parse_f32(parts[0])?; let load_5min = utils::parse_f32(parts[1])?; let load_15min = utils::parse_f32(parts[2])?; // Populate CPU data directly agent_data.system.cpu.load_1min = load_1min; agent_data.system.cpu.load_5min = load_5min; agent_data.system.cpu.load_15min = load_15min; Ok(()) } /// Collect CPU temperature and populate AgentData /// Prioritizes x86_pkg_temp over generic thermal zones async fn collect_temperature(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> { // Try x86_pkg_temp first (Intel CPU package temperature) if let Ok(temp) = self .read_thermal_zone("/sys/class/thermal/thermal_zone0/temp") .await { let temp_celsius = temp as f32 / 1000.0; agent_data.system.cpu.temperature_celsius = Some(temp_celsius); return Ok(()); } // Fallback: try other thermal zones for zone_id in 0..10 { let path = format!("/sys/class/thermal/thermal_zone{}/temp", zone_id); if let Ok(temp) = self.read_thermal_zone(&path).await { let temp_celsius = temp as f32 / 1000.0; agent_data.system.cpu.temperature_celsius = Some(temp_celsius); return Ok(()); } } debug!("No CPU temperature sensors found"); // Leave temperature as None if not available Ok(()) } /// Read temperature from thermal zone efficiently async fn read_thermal_zone(&self, path: &str) -> Result { let content = utils::read_proc_file(path)?; utils::parse_u64(content.trim()) } /// Collect CPU C-state (idle depth) and populate AgentData with top 3 C-states by usage async fn collect_cstate(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> { // Read C-state usage from first CPU (representative of overall system) // C-states indicate CPU idle depth: C1=light sleep, C6=deep sleep, C10=deepest let mut cstate_times: Vec<(String, u64)> = Vec::new(); let mut total_time: u64 = 0; // Collect all C-state times from CPU0 for state_num in 0..=10 { let time_path = format!("/sys/devices/system/cpu/cpu0/cpuidle/state{}/time", state_num); let name_path = format!("/sys/devices/system/cpu/cpu0/cpuidle/state{}/name", state_num); if let Ok(time_str) = utils::read_proc_file(&time_path) { if let Ok(time) = utils::parse_u64(time_str.trim()) { if let Ok(name) = utils::read_proc_file(&name_path) { let state_name = name.trim(); // Skip POLL state (not real idle) if state_name != "POLL" && time > 0 { // Extract "C" + digits pattern (C3, C10, etc.) to reduce JSON size // Handles formats like "C3_ACPI", "C10_MWAIT", etc. let clean_name = if let Some(c_pos) = state_name.find('C') { let rest = &state_name[c_pos + 1..]; let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count(); if digit_count > 0 { state_name[c_pos..c_pos + 1 + digit_count].to_string() } else { state_name.to_string() } } else { state_name.to_string() }; cstate_times.push((clean_name, time)); total_time += time; } } } } else { // No more states available break; } } // Sort by time descending to get top 3 cstate_times.sort_by(|a, b| b.1.cmp(&a.1)); // Calculate percentages for top 3 and populate AgentData agent_data.system.cpu.cstates = cstate_times .iter() .take(3) .map(|(name, time)| { let percent = if total_time > 0 { (*time as f32 / total_time as f32) * 100.0 } else { 0.0 }; cm_dashboard_shared::CStateInfo { name: name.clone(), percent, } }) .collect(); Ok(()) } } #[async_trait] impl Collector for CpuCollector { async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> { debug!("Collecting CPU metrics"); let start = std::time::Instant::now(); // Collect load averages (always available) self.collect_load_averages(agent_data).await?; // Collect temperature (optional) self.collect_temperature(agent_data).await?; // Collect C-state (CPU idle depth) self.collect_cstate(agent_data).await?; let duration = start.elapsed(); debug!("CPU collection completed in {:?}", duration); // Efficiency check: warn if collection takes too long if duration.as_millis() > 1 { debug!( "CPU collection took {}ms - consider optimization", duration.as_millis() ); } // Calculate status using thresholds agent_data.system.cpu.load_status = self.calculate_load_status(agent_data.system.cpu.load_1min); agent_data.system.cpu.temperature_status = if let Some(temp) = agent_data.system.cpu.temperature_celsius { self.calculate_temperature_status(temp) } else { Status::Unknown }; Ok(()) } }