cm-dashboard/agent/src/collectors/cpu.rs

use async_trait::async_trait;
use cm_dashboard_shared::{Metric, MetricValue, Status, registry};
use std::time::Duration;
use tracing::debug;

use super::{Collector, CollectorError, utils};
use crate::config::CpuConfig;

/// Extremely efficient CPU metrics collector
///
/// EFFICIENCY OPTIMIZATIONS:
/// - Single /proc/loadavg read for all load metrics
/// - Single /proc/stat read for CPU usage
/// - Minimal string allocations
/// - No process spawning
/// - <0.1ms collection time target
pub struct CpuCollector {
    config: CpuConfig,
    name: String,
}

impl CpuCollector {
    pub fn new(config: CpuConfig) -> Self {
        Self {
            config,
            name: "cpu".to_string(),
        }
    }

    /// Calculate CPU load status using configured thresholds
    fn calculate_load_status(&self, load: f32) -> Status {
        if load >= self.config.load_critical_threshold {
            Status::Critical
        } else if load >= self.config.load_warning_threshold {
            Status::Warning
        } else {
            Status::Ok
        }
    }

    /// Calculate CPU temperature status using configured thresholds
    fn calculate_temperature_status(&self, temp: f32) -> Status {
        if temp >= self.config.temperature_critical_threshold {
            Status::Critical
        } else if temp >= self.config.temperature_warning_threshold {
            Status::Warning
        } else {
            Status::Ok
        }
    }

    /// Collect CPU load averages from /proc/loadavg
    /// Format: "0.52 0.58 0.59 1/257 12345"
    async fn collect_load_averages(&self) -> Result<Vec<Metric>, CollectorError> {
        let content = utils::read_proc_file("/proc/loadavg")?;
        let parts: Vec<&str> = content.trim().split_whitespace().collect();

        if parts.len() < 3 {
            return Err(CollectorError::Parse {
                value: content,
                error: "Expected at least 3 values in /proc/loadavg".to_string(),
            });
        }

        let load_1min = utils::parse_f32(parts[0])?;
        let load_5min = utils::parse_f32(parts[1])?;
        let load_15min = utils::parse_f32(parts[2])?;

        // Calculate status for each load average (use 1min for primary status)
        let load_1min_status = self.calculate_load_status(load_1min);
        let load_5min_status = self.calculate_load_status(load_5min);
        let load_15min_status = self.calculate_load_status(load_15min);

        Ok(vec![
            Metric::new(
                registry::CPU_LOAD_1MIN.to_string(),
                MetricValue::Float(load_1min),
                load_1min_status,
            ).with_description("CPU load average over 1 minute".to_string()),

            Metric::new(
                registry::CPU_LOAD_5MIN.to_string(),
                MetricValue::Float(load_5min),
                load_5min_status,
            ).with_description("CPU load average over 5 minutes".to_string()),

            Metric::new(
                registry::CPU_LOAD_15MIN.to_string(),
                MetricValue::Float(load_15min),
                load_15min_status,
            ).with_description("CPU load average over 15 minutes".to_string()),
        ])
    }

    /// Collect CPU temperature from thermal zones
    /// Prioritizes x86_pkg_temp over generic thermal zones (legacy behavior)
    async fn collect_temperature(&self) -> Result<Option<Metric>, CollectorError> {
        // Try x86_pkg_temp first (Intel CPU package temperature)
        if let Ok(temp) = self.read_thermal_zone("/sys/class/thermal/thermal_zone0/temp").await {
            let temp_celsius = temp as f32 / 1000.0;
            let status = self.calculate_temperature_status(temp_celsius);

            return Ok(Some(Metric::new(
                registry::CPU_TEMPERATURE_CELSIUS.to_string(),
                MetricValue::Float(temp_celsius),
                status,
            ).with_description("CPU package temperature".to_string())
            .with_unit("°C".to_string())));
        }

        // Fallback: try other thermal zones
        for zone_id in 0..10 {
            let path = format!("/sys/class/thermal/thermal_zone{}/temp", zone_id);
            if let Ok(temp) = self.read_thermal_zone(&path).await {
                let temp_celsius = temp as f32 / 1000.0;
                let status = self.calculate_temperature_status(temp_celsius);

                return Ok(Some(Metric::new(
                    registry::CPU_TEMPERATURE_CELSIUS.to_string(),
                    MetricValue::Float(temp_celsius),
                    status,
                ).with_description(format!("CPU temperature from thermal_zone{}", zone_id))
                .with_unit("°C".to_string())));
            }
        }

        debug!("No CPU temperature sensors found");
        Ok(None)
    }

    /// Read temperature from thermal zone efficiently
    async fn read_thermal_zone(&self, path: &str) -> Result<u64, CollectorError> {
        let content = utils::read_proc_file(path)?;
        utils::parse_u64(content.trim())
    }

    /// Collect CPU frequency from /proc/cpuinfo or scaling governor
    async fn collect_frequency(&self) -> Result<Option<Metric>, CollectorError> {
        // Try scaling frequency first (more accurate for current frequency)
        if let Ok(freq) = utils::read_proc_file("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq") {
            if let Ok(freq_khz) = utils::parse_u64(freq.trim()) {
                let freq_mhz = freq_khz as f32 / 1000.0;

                return Ok(Some(Metric::new(
                    registry::CPU_FREQUENCY_MHZ.to_string(),
                    MetricValue::Float(freq_mhz),
                    Status::Ok, // Frequency doesn't have status thresholds
                ).with_description("Current CPU frequency".to_string())
                .with_unit("MHz".to_string())));
            }
        }

        // Fallback: parse /proc/cpuinfo for base frequency
        if let Ok(content) = utils::read_proc_file("/proc/cpuinfo") {
            for line in content.lines() {
                if line.starts_with("cpu MHz") {
                    if let Some(freq_str) = line.split(':').nth(1) {
                        if let Ok(freq_mhz) = utils::parse_f32(freq_str) {
                            return Ok(Some(Metric::new(
                                registry::CPU_FREQUENCY_MHZ.to_string(),
                                MetricValue::Float(freq_mhz),
                                Status::Ok,
                            ).with_description("CPU base frequency from /proc/cpuinfo".to_string())
                            .with_unit("MHz".to_string())));
                        }
                    }
                    break; // Only need first CPU entry
                }
            }
        }

        debug!("CPU frequency not available");
        Ok(None)
    }


}

#[async_trait]
impl Collector for CpuCollector {
    fn name(&self) -> &str {
        &self.name
    }

    async fn collect(&self) -> Result<Vec<Metric>, CollectorError> {

        debug!("Collecting CPU metrics");
        let start = std::time::Instant::now();

        let mut metrics = Vec::with_capacity(5); // Pre-allocate for efficiency

        // Collect load averages (always available)
        metrics.extend(self.collect_load_averages().await?);

        // Collect temperature (optional)
        if let Some(temp_metric) = self.collect_temperature().await? {
            metrics.push(temp_metric);
        }

        // Collect frequency (optional)
        if let Some(freq_metric) = self.collect_frequency().await? {
            metrics.push(freq_metric);
        }


        let duration = start.elapsed();
        debug!("CPU collection completed in {:?} with {} metrics", duration, metrics.len());

        // Efficiency check: warn if collection takes too long
        if duration.as_millis() > 1 {
            debug!("CPU collection took {}ms - consider optimization", duration.as_millis());
        }

        // Store performance metrics
        // Performance tracking handled by cache system

        Ok(metrics)
    }

    fn get_performance_metrics(&self) -> Option<super::PerformanceMetrics> {
        None // Performance tracking handled by cache system
    }
}