- Remove unused fields from CommandStatus variants - Clean up unused methods and unused collector fields - Fix lifetime syntax warning in SystemWidget - Delete unused cache module completely - Remove redundant render methods from widgets All agent and dashboard warnings eliminated while preserving panel switching and scrolling functionality.
240 lines
9.0 KiB
Rust
240 lines
9.0 KiB
Rust
use async_trait::async_trait;
|
|
use cm_dashboard_shared::{registry, Metric, MetricValue, Status, StatusTracker, HysteresisThresholds};
|
|
|
|
use tracing::debug;
|
|
|
|
use super::{utils, Collector, CollectorError};
|
|
use crate::config::CpuConfig;
|
|
|
|
/// Extremely efficient CPU metrics collector
|
|
///
|
|
/// EFFICIENCY OPTIMIZATIONS:
|
|
/// - Single /proc/loadavg read for all load metrics
|
|
/// - Single /proc/stat read for CPU usage
|
|
/// - Minimal string allocations
|
|
/// - No process spawning
|
|
/// - <0.1ms collection time target
|
|
pub struct CpuCollector {
|
|
load_thresholds: HysteresisThresholds,
|
|
temperature_thresholds: HysteresisThresholds,
|
|
}
|
|
|
|
impl CpuCollector {
|
|
pub fn new(config: CpuConfig) -> Self {
|
|
// Create hysteresis thresholds with 10% gap for recovery
|
|
let load_thresholds = HysteresisThresholds::new(
|
|
config.load_warning_threshold,
|
|
config.load_critical_threshold,
|
|
);
|
|
|
|
let temperature_thresholds = HysteresisThresholds::new(
|
|
config.temperature_warning_threshold,
|
|
config.temperature_critical_threshold,
|
|
);
|
|
|
|
Self {
|
|
load_thresholds,
|
|
temperature_thresholds,
|
|
}
|
|
}
|
|
|
|
/// Calculate CPU load status using hysteresis thresholds
|
|
fn calculate_load_status(&self, metric_name: &str, load: f32, status_tracker: &mut StatusTracker) -> Status {
|
|
status_tracker.calculate_with_hysteresis(metric_name, load, &self.load_thresholds)
|
|
}
|
|
|
|
/// Calculate CPU temperature status using hysteresis thresholds
|
|
fn calculate_temperature_status(&self, metric_name: &str, temp: f32, status_tracker: &mut StatusTracker) -> Status {
|
|
status_tracker.calculate_with_hysteresis(metric_name, temp, &self.temperature_thresholds)
|
|
}
|
|
|
|
/// Collect CPU load averages from /proc/loadavg
|
|
/// Format: "0.52 0.58 0.59 1/257 12345"
|
|
async fn collect_load_averages(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
|
let content = utils::read_proc_file("/proc/loadavg")?;
|
|
let parts: Vec<&str> = content.trim().split_whitespace().collect();
|
|
|
|
if parts.len() < 3 {
|
|
return Err(CollectorError::Parse {
|
|
value: content,
|
|
error: "Expected at least 3 values in /proc/loadavg".to_string(),
|
|
});
|
|
}
|
|
|
|
let load_1min = utils::parse_f32(parts[0])?;
|
|
let load_5min = utils::parse_f32(parts[1])?;
|
|
let load_15min = utils::parse_f32(parts[2])?;
|
|
|
|
// Only apply thresholds to 5-minute load average
|
|
let load_1min_status = Status::Ok; // No alerting on 1min
|
|
let load_5min_status = self.calculate_load_status(registry::CPU_LOAD_5MIN, load_5min, status_tracker); // Only 5min triggers alerts
|
|
let load_15min_status = Status::Ok; // No alerting on 15min
|
|
|
|
Ok(vec![
|
|
Metric::new(
|
|
registry::CPU_LOAD_1MIN.to_string(),
|
|
MetricValue::Float(load_1min),
|
|
load_1min_status,
|
|
)
|
|
.with_description("CPU load average over 1 minute".to_string()),
|
|
Metric::new(
|
|
registry::CPU_LOAD_5MIN.to_string(),
|
|
MetricValue::Float(load_5min),
|
|
load_5min_status,
|
|
)
|
|
.with_description("CPU load average over 5 minutes".to_string()),
|
|
Metric::new(
|
|
registry::CPU_LOAD_15MIN.to_string(),
|
|
MetricValue::Float(load_15min),
|
|
load_15min_status,
|
|
)
|
|
.with_description("CPU load average over 15 minutes".to_string()),
|
|
])
|
|
}
|
|
|
|
/// Collect CPU temperature from thermal zones
|
|
/// Prioritizes x86_pkg_temp over generic thermal zones (legacy behavior)
|
|
async fn collect_temperature(&self, status_tracker: &mut StatusTracker) -> Result<Option<Metric>, CollectorError> {
|
|
// Try x86_pkg_temp first (Intel CPU package temperature)
|
|
if let Ok(temp) = self
|
|
.read_thermal_zone("/sys/class/thermal/thermal_zone0/temp")
|
|
.await
|
|
{
|
|
let temp_celsius = temp as f32 / 1000.0;
|
|
let status = self.calculate_temperature_status(registry::CPU_TEMPERATURE_CELSIUS, temp_celsius, status_tracker);
|
|
|
|
return Ok(Some(
|
|
Metric::new(
|
|
registry::CPU_TEMPERATURE_CELSIUS.to_string(),
|
|
MetricValue::Float(temp_celsius),
|
|
status,
|
|
)
|
|
.with_description("CPU package temperature".to_string())
|
|
.with_unit("°C".to_string()),
|
|
));
|
|
}
|
|
|
|
// Fallback: try other thermal zones
|
|
for zone_id in 0..10 {
|
|
let path = format!("/sys/class/thermal/thermal_zone{}/temp", zone_id);
|
|
if let Ok(temp) = self.read_thermal_zone(&path).await {
|
|
let temp_celsius = temp as f32 / 1000.0;
|
|
let status = self.calculate_temperature_status(registry::CPU_TEMPERATURE_CELSIUS, temp_celsius, status_tracker);
|
|
|
|
return Ok(Some(
|
|
Metric::new(
|
|
registry::CPU_TEMPERATURE_CELSIUS.to_string(),
|
|
MetricValue::Float(temp_celsius),
|
|
status,
|
|
)
|
|
.with_description(format!("CPU temperature from thermal_zone{}", zone_id))
|
|
.with_unit("°C".to_string()),
|
|
));
|
|
}
|
|
}
|
|
|
|
debug!("No CPU temperature sensors found");
|
|
Ok(None)
|
|
}
|
|
|
|
/// Read temperature from thermal zone efficiently
|
|
async fn read_thermal_zone(&self, path: &str) -> Result<u64, CollectorError> {
|
|
let content = utils::read_proc_file(path)?;
|
|
utils::parse_u64(content.trim())
|
|
}
|
|
|
|
/// Collect CPU frequency from /proc/cpuinfo or scaling governor
|
|
async fn collect_frequency(&self) -> Result<Option<Metric>, CollectorError> {
|
|
// Try scaling frequency first (more accurate for current frequency)
|
|
if let Ok(freq) =
|
|
utils::read_proc_file("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq")
|
|
{
|
|
if let Ok(freq_khz) = utils::parse_u64(freq.trim()) {
|
|
let freq_mhz = freq_khz as f32 / 1000.0;
|
|
|
|
return Ok(Some(
|
|
Metric::new(
|
|
registry::CPU_FREQUENCY_MHZ.to_string(),
|
|
MetricValue::Float(freq_mhz),
|
|
Status::Ok, // Frequency doesn't have status thresholds
|
|
)
|
|
.with_description("Current CPU frequency".to_string())
|
|
.with_unit("MHz".to_string()),
|
|
));
|
|
}
|
|
}
|
|
|
|
// Fallback: parse /proc/cpuinfo for base frequency
|
|
if let Ok(content) = utils::read_proc_file("/proc/cpuinfo") {
|
|
for line in content.lines() {
|
|
if line.starts_with("cpu MHz") {
|
|
if let Some(freq_str) = line.split(':').nth(1) {
|
|
if let Ok(freq_mhz) = utils::parse_f32(freq_str) {
|
|
return Ok(Some(
|
|
Metric::new(
|
|
registry::CPU_FREQUENCY_MHZ.to_string(),
|
|
MetricValue::Float(freq_mhz),
|
|
Status::Ok,
|
|
)
|
|
.with_description(
|
|
"CPU base frequency from /proc/cpuinfo".to_string(),
|
|
)
|
|
.with_unit("MHz".to_string()),
|
|
));
|
|
}
|
|
}
|
|
break; // Only need first CPU entry
|
|
}
|
|
}
|
|
}
|
|
|
|
debug!("CPU frequency not available");
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Collector for CpuCollector {
|
|
|
|
async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
|
debug!("Collecting CPU metrics");
|
|
let start = std::time::Instant::now();
|
|
|
|
let mut metrics = Vec::with_capacity(5); // Pre-allocate for efficiency
|
|
|
|
// Collect load averages (always available)
|
|
metrics.extend(self.collect_load_averages(status_tracker).await?);
|
|
|
|
// Collect temperature (optional)
|
|
if let Some(temp_metric) = self.collect_temperature(status_tracker).await? {
|
|
metrics.push(temp_metric);
|
|
}
|
|
|
|
// Collect frequency (optional)
|
|
if let Some(freq_metric) = self.collect_frequency().await? {
|
|
metrics.push(freq_metric);
|
|
}
|
|
|
|
let duration = start.elapsed();
|
|
debug!(
|
|
"CPU collection completed in {:?} with {} metrics",
|
|
duration,
|
|
metrics.len()
|
|
);
|
|
|
|
// Efficiency check: warn if collection takes too long
|
|
if duration.as_millis() > 1 {
|
|
debug!(
|
|
"CPU collection took {}ms - consider optimization",
|
|
duration.as_millis()
|
|
);
|
|
}
|
|
|
|
// Store performance metrics
|
|
// Performance tracking handled by cache system
|
|
|
|
Ok(metrics)
|
|
}
|
|
|
|
}
|