Implement real-time process monitoring and fix UI hardcoded data
This commit addresses several key issues identified during development: Major Changes: - Replace hardcoded top CPU/RAM process display with real system data - Add intelligent process monitoring to CpuCollector using ps command - Fix disk metrics permission issues in systemd collector - Optimize service collection to focus on status, memory, and disk only - Update dashboard widgets to display live process information Process Monitoring Implementation: - Added collect_top_cpu_process() and collect_top_ram_process() methods - Implemented ps-based monitoring with accurate CPU percentages - Added filtering to prevent self-monitoring artifacts (ps commands) - Enhanced error handling and validation for process data - Dashboard now shows realistic values like "claude (PID 2974) 11.0%" Service Collection Optimization: - Removed CPU monitoring from systemd collector for efficiency - Enhanced service directory permission error logging - Simplified services widget to show essential metrics only - Fixed service-to-directory mapping accuracy UI and Dashboard Improvements: - Reorganized dashboard layout with btop-inspired multi-panel design - Updated system panel to include real top CPU/RAM process display - Enhanced widget formatting and data presentation - Removed placeholder/hardcoded data throughout the interface Technical Details: - Updated agent/src/collectors/cpu.rs with process monitoring - Modified dashboard/src/ui/mod.rs for real-time process display - Enhanced systemd collector error handling and disk metrics - Updated CLAUDE.md documentation with implementation details
This commit is contained in:
185
agent/src/metrics/mod.rs
Normal file
185
agent/src/metrics/mod.rs
Normal file
@@ -0,0 +1,185 @@
|
||||
use anyhow::Result;
|
||||
use cm_dashboard_shared::Metric;
|
||||
use std::collections::HashMap;
|
||||
use std::time::Instant;
|
||||
use tracing::{info, error, debug};
|
||||
|
||||
use crate::config::{CollectorConfig, AgentConfig};
|
||||
use crate::collectors::{Collector, cpu::CpuCollector, memory::MemoryCollector, disk::DiskCollector, systemd::SystemdCollector, cached_collector::CachedCollector};
|
||||
use crate::cache::MetricCacheManager;
|
||||
|
||||
/// Manages all metric collectors with intelligent caching
|
||||
pub struct MetricCollectionManager {
|
||||
collectors: Vec<Box<dyn Collector>>,
|
||||
cache_manager: MetricCacheManager,
|
||||
last_collection_times: HashMap<String, Instant>,
|
||||
}
|
||||
|
||||
impl MetricCollectionManager {
|
||||
pub async fn new(config: &CollectorConfig, agent_config: &AgentConfig) -> Result<Self> {
|
||||
let mut collectors: Vec<Box<dyn Collector>> = Vec::new();
|
||||
|
||||
// Benchmark mode - only enable specific collector based on env var
|
||||
let benchmark_mode = std::env::var("BENCHMARK_COLLECTOR").ok();
|
||||
|
||||
match benchmark_mode.as_deref() {
|
||||
Some("cpu") => {
|
||||
// CPU collector only
|
||||
if config.cpu.enabled {
|
||||
let cpu_collector = CpuCollector::new(config.cpu.clone());
|
||||
collectors.push(Box::new(cpu_collector));
|
||||
info!("BENCHMARK: CPU collector only");
|
||||
}
|
||||
},
|
||||
Some("memory") => {
|
||||
// Memory collector only
|
||||
if config.memory.enabled {
|
||||
let memory_collector = MemoryCollector::new(config.memory.clone());
|
||||
collectors.push(Box::new(memory_collector));
|
||||
info!("BENCHMARK: Memory collector only");
|
||||
}
|
||||
},
|
||||
Some("disk") => {
|
||||
// Disk collector only
|
||||
let disk_collector = DiskCollector::new();
|
||||
collectors.push(Box::new(disk_collector));
|
||||
info!("BENCHMARK: Disk collector only");
|
||||
},
|
||||
Some("systemd") => {
|
||||
// Systemd collector only
|
||||
let systemd_collector = SystemdCollector::new();
|
||||
collectors.push(Box::new(systemd_collector));
|
||||
info!("BENCHMARK: Systemd collector only");
|
||||
},
|
||||
Some("none") => {
|
||||
// No collectors - test agent loop only
|
||||
info!("BENCHMARK: No collectors enabled");
|
||||
},
|
||||
_ => {
|
||||
// Normal mode - all collectors
|
||||
if config.cpu.enabled {
|
||||
let cpu_collector = CpuCollector::new(config.cpu.clone());
|
||||
collectors.push(Box::new(cpu_collector));
|
||||
info!("CPU collector initialized");
|
||||
}
|
||||
|
||||
if config.memory.enabled {
|
||||
let memory_collector = MemoryCollector::new(config.memory.clone());
|
||||
collectors.push(Box::new(memory_collector));
|
||||
info!("Memory collector initialized");
|
||||
}
|
||||
|
||||
let disk_collector = DiskCollector::new();
|
||||
collectors.push(Box::new(disk_collector));
|
||||
info!("Disk collector initialized");
|
||||
|
||||
let systemd_collector = SystemdCollector::new();
|
||||
collectors.push(Box::new(systemd_collector));
|
||||
info!("Systemd collector initialized");
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize cache manager with configuration
|
||||
let cache_manager = MetricCacheManager::new(agent_config.cache.clone());
|
||||
|
||||
// Start background cache tasks
|
||||
cache_manager.start_background_tasks().await;
|
||||
|
||||
info!("Metric collection manager initialized with {} collectors and caching enabled", collectors.len());
|
||||
|
||||
Ok(Self {
|
||||
collectors,
|
||||
cache_manager,
|
||||
last_collection_times: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Collect metrics from all collectors with intelligent caching
|
||||
pub async fn collect_all_metrics(&mut self) -> Result<Vec<Metric>> {
|
||||
let mut all_metrics = Vec::new();
|
||||
let now = Instant::now();
|
||||
|
||||
// Collecting metrics from collectors (debug logging disabled for performance)
|
||||
|
||||
// Keep track of which collector types we're collecting fresh data from
|
||||
let mut collecting_fresh = std::collections::HashSet::new();
|
||||
|
||||
// For each collector, check if we need to collect based on time intervals
|
||||
for collector in &self.collectors {
|
||||
let collector_name = collector.name();
|
||||
|
||||
// Determine cache interval for this collector type - ALL REALTIME FOR FAST UPDATES
|
||||
let cache_interval_secs = match collector_name {
|
||||
"cpu" | "memory" | "disk" | "systemd" => 2, // All realtime for fast updates
|
||||
_ => 2, // All realtime for fast updates
|
||||
};
|
||||
|
||||
let should_collect = if let Some(last_time) = self.last_collection_times.get(collector_name) {
|
||||
now.duration_since(*last_time).as_secs() >= cache_interval_secs
|
||||
} else {
|
||||
true // First collection
|
||||
};
|
||||
|
||||
if should_collect {
|
||||
collecting_fresh.insert(collector_name.to_string());
|
||||
match collector.collect().await {
|
||||
Ok(metrics) => {
|
||||
// Collector returned fresh metrics (debug logging disabled for performance)
|
||||
|
||||
// Cache all new metrics
|
||||
for metric in &metrics {
|
||||
self.cache_manager.cache_metric(metric.clone()).await;
|
||||
}
|
||||
|
||||
all_metrics.extend(metrics);
|
||||
self.last_collection_times.insert(collector_name.to_string(), now);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Collector '{}' failed: {}", collector_name, e);
|
||||
// Continue with other collectors even if one fails
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let elapsed = self.last_collection_times.get(collector_name)
|
||||
.map(|t| now.duration_since(*t).as_secs())
|
||||
.unwrap_or(0);
|
||||
// Collector skipped (debug logging disabled for performance)
|
||||
}
|
||||
}
|
||||
|
||||
// For 2-second intervals, skip cached metrics to avoid duplicates
|
||||
// (Cache system disabled for realtime updates)
|
||||
|
||||
// Collected metrics total (debug logging disabled for performance)
|
||||
Ok(all_metrics)
|
||||
}
|
||||
|
||||
/// Get names of all registered collectors
|
||||
pub fn get_collector_names(&self) -> Vec<String> {
|
||||
self.collectors.iter()
|
||||
.map(|c| c.name().to_string())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get collector statistics
|
||||
pub fn get_stats(&self) -> HashMap<String, bool> {
|
||||
self.collectors.iter()
|
||||
.map(|c| (c.name().to_string(), true)) // All collectors are enabled
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Determine which collector handles a specific metric
|
||||
fn get_collector_for_metric(&self, metric_name: &str) -> String {
|
||||
if metric_name.starts_with("cpu_") {
|
||||
"cpu".to_string()
|
||||
} else if metric_name.starts_with("memory_") {
|
||||
"memory".to_string()
|
||||
} else if metric_name.starts_with("disk_") {
|
||||
"disk".to_string()
|
||||
} else if metric_name.starts_with("service_") {
|
||||
"systemd".to_string()
|
||||
} else {
|
||||
"unknown".to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user