Implement real-time process monitoring and fix UI hardcoded data
This commit addresses several key issues identified during development: Major Changes: - Replace hardcoded top CPU/RAM process display with real system data - Add intelligent process monitoring to CpuCollector using ps command - Fix disk metrics permission issues in systemd collector - Optimize service collection to focus on status, memory, and disk only - Update dashboard widgets to display live process information Process Monitoring Implementation: - Added collect_top_cpu_process() and collect_top_ram_process() methods - Implemented ps-based monitoring with accurate CPU percentages - Added filtering to prevent self-monitoring artifacts (ps commands) - Enhanced error handling and validation for process data - Dashboard now shows realistic values like "claude (PID 2974) 11.0%" Service Collection Optimization: - Removed CPU monitoring from systemd collector for efficiency - Enhanced service directory permission error logging - Simplified services widget to show essential metrics only - Fixed service-to-directory mapping accuracy UI and Dashboard Improvements: - Reorganized dashboard layout with btop-inspired multi-panel design - Updated system panel to include real top CPU/RAM process display - Enhanced widget formatting and data presentation - Removed placeholder/hardcoded data throughout the interface Technical Details: - Updated agent/src/collectors/cpu.rs with process monitoring - Modified dashboard/src/ui/mod.rs for real-time process display - Enhanced systemd collector error handling and disk metrics - Updated CLAUDE.md documentation with implementation details
This commit is contained in:
230
dashboard/src/metrics/store.rs
Normal file
230
dashboard/src/metrics/store.rs
Normal file
@@ -0,0 +1,230 @@
|
||||
use cm_dashboard_shared::{Metric, Status};
|
||||
use std::collections::HashMap;
|
||||
use std::time::{Duration, Instant};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use super::{MetricDataPoint, WidgetType, subscriptions};
|
||||
|
||||
/// Central metric storage for the dashboard
|
||||
pub struct MetricStore {
|
||||
/// Current metrics: hostname -> metric_name -> metric
|
||||
current_metrics: HashMap<String, HashMap<String, Metric>>,
|
||||
/// Historical metrics for trending
|
||||
historical_metrics: HashMap<String, Vec<MetricDataPoint>>,
|
||||
/// Last update timestamp per host
|
||||
last_update: HashMap<String, Instant>,
|
||||
/// Configuration
|
||||
max_metrics_per_host: usize,
|
||||
history_retention: Duration,
|
||||
}
|
||||
|
||||
impl MetricStore {
|
||||
pub fn new(max_metrics_per_host: usize, history_retention_hours: u64) -> Self {
|
||||
Self {
|
||||
current_metrics: HashMap::new(),
|
||||
historical_metrics: HashMap::new(),
|
||||
last_update: HashMap::new(),
|
||||
max_metrics_per_host,
|
||||
history_retention: Duration::from_secs(history_retention_hours * 3600),
|
||||
}
|
||||
}
|
||||
|
||||
/// Update metrics for a specific host
|
||||
pub fn update_metrics(&mut self, hostname: &str, metrics: Vec<Metric>) {
|
||||
let now = Instant::now();
|
||||
|
||||
debug!("Updating {} metrics for host {}", metrics.len(), hostname);
|
||||
|
||||
// Get or create host entry
|
||||
let host_metrics = self.current_metrics
|
||||
.entry(hostname.to_string())
|
||||
.or_insert_with(HashMap::new);
|
||||
|
||||
// Get or create historical entry
|
||||
let host_history = self.historical_metrics
|
||||
.entry(hostname.to_string())
|
||||
.or_insert_with(Vec::new);
|
||||
|
||||
// Update current metrics and add to history
|
||||
for metric in metrics {
|
||||
let metric_name = metric.name.clone();
|
||||
|
||||
// Store current metric
|
||||
host_metrics.insert(metric_name.clone(), metric.clone());
|
||||
|
||||
// Add to history
|
||||
host_history.push(MetricDataPoint {
|
||||
metric,
|
||||
received_at: now,
|
||||
});
|
||||
}
|
||||
|
||||
// Update last update timestamp
|
||||
self.last_update.insert(hostname.to_string(), now);
|
||||
|
||||
// Get metrics count before cleanup
|
||||
let metrics_count = host_metrics.len();
|
||||
|
||||
// Cleanup old history and enforce limits
|
||||
self.cleanup_host_data(hostname);
|
||||
|
||||
info!("Updated metrics for {}: {} current metrics",
|
||||
hostname, metrics_count);
|
||||
}
|
||||
|
||||
/// Get current metric for a specific host
|
||||
pub fn get_metric(&self, hostname: &str, metric_name: &str) -> Option<&Metric> {
|
||||
self.current_metrics
|
||||
.get(hostname)?
|
||||
.get(metric_name)
|
||||
}
|
||||
|
||||
/// Get all current metrics for a host
|
||||
pub fn get_host_metrics(&self, hostname: &str) -> Option<&HashMap<String, Metric>> {
|
||||
self.current_metrics.get(hostname)
|
||||
}
|
||||
|
||||
/// Get all current metrics for a host as a vector
|
||||
pub fn get_metrics_for_host(&self, hostname: &str) -> Vec<&Metric> {
|
||||
if let Some(metrics_map) = self.current_metrics.get(hostname) {
|
||||
metrics_map.values().collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get metrics for a specific widget type
|
||||
pub fn get_metrics_for_widget(&self, hostname: &str, widget_type: WidgetType) -> Vec<&Metric> {
|
||||
let subscriptions = subscriptions::get_widget_subscriptions(widget_type);
|
||||
|
||||
if let Some(host_metrics) = self.get_host_metrics(hostname) {
|
||||
subscriptions
|
||||
.iter()
|
||||
.filter_map(|&metric_name| host_metrics.get(metric_name))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get aggregated status for a widget
|
||||
pub fn get_widget_status(&self, hostname: &str, widget_type: WidgetType) -> Status {
|
||||
let metrics = self.get_metrics_for_widget(hostname, widget_type);
|
||||
|
||||
if metrics.is_empty() {
|
||||
Status::Unknown
|
||||
} else {
|
||||
let statuses: Vec<Status> = metrics.iter().map(|m| m.status).collect();
|
||||
Status::aggregate(&statuses)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get list of all hosts with metrics
|
||||
pub fn get_hosts(&self) -> Vec<String> {
|
||||
self.current_metrics.keys().cloned().collect()
|
||||
}
|
||||
|
||||
/// Get connected hosts (hosts with recent updates)
|
||||
pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> {
|
||||
let now = Instant::now();
|
||||
|
||||
self.last_update
|
||||
.iter()
|
||||
.filter_map(|(hostname, &last_update)| {
|
||||
if now.duration_since(last_update) <= timeout {
|
||||
Some(hostname.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get last update timestamp for a host
|
||||
pub fn get_last_update(&self, hostname: &str) -> Option<Instant> {
|
||||
self.last_update.get(hostname).copied()
|
||||
}
|
||||
|
||||
/// Check if host is considered connected
|
||||
pub fn is_host_connected(&self, hostname: &str, timeout: Duration) -> bool {
|
||||
if let Some(&last_update) = self.last_update.get(hostname) {
|
||||
Instant::now().duration_since(last_update) <= timeout
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Get metric value as specific type (helper function)
|
||||
pub fn get_metric_value_f32(&self, hostname: &str, metric_name: &str) -> Option<f32> {
|
||||
self.get_metric(hostname, metric_name)?
|
||||
.value
|
||||
.as_f32()
|
||||
}
|
||||
|
||||
/// Get metric value as string (helper function)
|
||||
pub fn get_metric_value_string(&self, hostname: &str, metric_name: &str) -> Option<String> {
|
||||
Some(self.get_metric(hostname, metric_name)?
|
||||
.value
|
||||
.as_string())
|
||||
}
|
||||
|
||||
/// Get historical data for a metric
|
||||
pub fn get_metric_history(&self, hostname: &str, metric_name: &str) -> Vec<&MetricDataPoint> {
|
||||
if let Some(history) = self.historical_metrics.get(hostname) {
|
||||
history
|
||||
.iter()
|
||||
.filter(|dp| dp.metric.name == metric_name)
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Cleanup old data and enforce limits
|
||||
fn cleanup_host_data(&mut self, hostname: &str) {
|
||||
let now = Instant::now();
|
||||
|
||||
// Cleanup historical data
|
||||
if let Some(history) = self.historical_metrics.get_mut(hostname) {
|
||||
// Remove old entries
|
||||
history.retain(|dp| now.duration_since(dp.received_at) <= self.history_retention);
|
||||
|
||||
// Enforce size limit
|
||||
if history.len() > self.max_metrics_per_host {
|
||||
let excess = history.len() - self.max_metrics_per_host;
|
||||
history.drain(0..excess);
|
||||
warn!("Trimmed {} old metrics for host {} (size limit: {})",
|
||||
excess, hostname, self.max_metrics_per_host);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get storage statistics
|
||||
pub fn get_stats(&self) -> MetricStoreStats {
|
||||
let total_current_metrics: usize = self.current_metrics
|
||||
.values()
|
||||
.map(|host_metrics| host_metrics.len())
|
||||
.sum();
|
||||
|
||||
let total_historical_metrics: usize = self.historical_metrics
|
||||
.values()
|
||||
.map(|history| history.len())
|
||||
.sum();
|
||||
|
||||
MetricStoreStats {
|
||||
total_hosts: self.current_metrics.len(),
|
||||
total_current_metrics,
|
||||
total_historical_metrics,
|
||||
connected_hosts: self.get_connected_hosts(Duration::from_secs(30)).len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Metric store statistics
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MetricStoreStats {
|
||||
pub total_hosts: usize,
|
||||
pub total_current_metrics: usize,
|
||||
pub total_historical_metrics: usize,
|
||||
pub connected_hosts: usize,
|
||||
}
|
||||
Reference in New Issue
Block a user