Implement real-time process monitoring and fix UI hardcoded data
This commit addresses several key issues identified during development: Major Changes: - Replace hardcoded top CPU/RAM process display with real system data - Add intelligent process monitoring to CpuCollector using ps command - Fix disk metrics permission issues in systemd collector - Optimize service collection to focus on status, memory, and disk only - Update dashboard widgets to display live process information Process Monitoring Implementation: - Added collect_top_cpu_process() and collect_top_ram_process() methods - Implemented ps-based monitoring with accurate CPU percentages - Added filtering to prevent self-monitoring artifacts (ps commands) - Enhanced error handling and validation for process data - Dashboard now shows realistic values like "claude (PID 2974) 11.0%" Service Collection Optimization: - Removed CPU monitoring from systemd collector for efficiency - Enhanced service directory permission error logging - Simplified services widget to show essential metrics only - Fixed service-to-directory mapping accuracy UI and Dashboard Improvements: - Reorganized dashboard layout with btop-inspired multi-panel design - Updated system panel to include real top CPU/RAM process display - Enhanced widget formatting and data presentation - Removed placeholder/hardcoded data throughout the interface Technical Details: - Updated agent/src/collectors/cpu.rs with process monitoring - Modified dashboard/src/ui/mod.rs for real-time process display - Enhanced systemd collector error handling and disk metrics - Updated CLAUDE.md documentation with implementation details
This commit is contained in:
173
agent/src/collectors/disk.rs
Normal file
173
agent/src/collectors/disk.rs
Normal file
@@ -0,0 +1,173 @@
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use cm_dashboard_shared::{Metric, MetricValue, Status};
|
||||
use std::process::Command;
|
||||
use std::time::Instant;
|
||||
use tracing::debug;
|
||||
|
||||
use super::{Collector, CollectorError, PerformanceMetrics};
|
||||
|
||||
/// Disk usage collector for monitoring filesystem sizes
|
||||
pub struct DiskCollector {
|
||||
// Immutable collector for caching compatibility
|
||||
}
|
||||
|
||||
impl DiskCollector {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
|
||||
/// Get directory size using du command (efficient for single directory)
|
||||
fn get_directory_size(&self, path: &str) -> Result<u64> {
|
||||
let output = Command::new("du")
|
||||
.arg("-s")
|
||||
.arg("--block-size=1")
|
||||
.arg(path)
|
||||
.output()?;
|
||||
|
||||
// du returns success even with permission denied warnings in stderr
|
||||
// We only care if the command completely failed or produced no stdout
|
||||
let output_str = String::from_utf8(output.stdout)?;
|
||||
|
||||
if output_str.trim().is_empty() {
|
||||
return Err(anyhow::anyhow!("du command produced no output for {}", path));
|
||||
}
|
||||
|
||||
let size_str = output_str
|
||||
.split_whitespace()
|
||||
.next()
|
||||
.ok_or_else(|| anyhow::anyhow!("Failed to parse du output"))?;
|
||||
|
||||
let size_bytes = size_str.parse::<u64>()?;
|
||||
Ok(size_bytes)
|
||||
}
|
||||
|
||||
/// Get filesystem info using df command
|
||||
fn get_filesystem_info(&self, path: &str) -> Result<(u64, u64)> {
|
||||
let output = Command::new("df")
|
||||
.arg("--block-size=1")
|
||||
.arg(path)
|
||||
.output()?;
|
||||
|
||||
if !output.status.success() {
|
||||
return Err(anyhow::anyhow!("df command failed for {}", path));
|
||||
}
|
||||
|
||||
let output_str = String::from_utf8(output.stdout)?;
|
||||
let lines: Vec<&str> = output_str.lines().collect();
|
||||
|
||||
if lines.len() < 2 {
|
||||
return Err(anyhow::anyhow!("Unexpected df output format"));
|
||||
}
|
||||
|
||||
let fields: Vec<&str> = lines[1].split_whitespace().collect();
|
||||
if fields.len() < 4 {
|
||||
return Err(anyhow::anyhow!("Unexpected df fields count"));
|
||||
}
|
||||
|
||||
let total_bytes = fields[1].parse::<u64>()?;
|
||||
let used_bytes = fields[2].parse::<u64>()?;
|
||||
|
||||
Ok((total_bytes, used_bytes))
|
||||
}
|
||||
|
||||
/// Calculate status based on usage percentage
|
||||
fn calculate_usage_status(&self, used_bytes: u64, total_bytes: u64) -> Status {
|
||||
if total_bytes == 0 {
|
||||
return Status::Unknown;
|
||||
}
|
||||
|
||||
let usage_percent = (used_bytes as f64 / total_bytes as f64) * 100.0;
|
||||
|
||||
// Thresholds for disk usage
|
||||
if usage_percent >= 95.0 {
|
||||
Status::Critical
|
||||
} else if usage_percent >= 85.0 {
|
||||
Status::Warning
|
||||
} else {
|
||||
Status::Ok
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Collector for DiskCollector {
|
||||
fn name(&self) -> &str {
|
||||
"disk"
|
||||
}
|
||||
|
||||
async fn collect(&self) -> Result<Vec<Metric>, CollectorError> {
|
||||
let start_time = Instant::now();
|
||||
debug!("Collecting disk metrics");
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
// Monitor /tmp directory size
|
||||
match self.get_directory_size("/tmp") {
|
||||
Ok(tmp_size_bytes) => {
|
||||
let tmp_size_mb = tmp_size_bytes as f64 / (1024.0 * 1024.0);
|
||||
|
||||
// Get /tmp filesystem info (usually tmpfs with 2GB limit)
|
||||
let (total_bytes, _) = match self.get_filesystem_info("/tmp") {
|
||||
Ok((total, used)) => (total, used),
|
||||
Err(_) => {
|
||||
// Fallback: assume 2GB limit for tmpfs
|
||||
(2 * 1024 * 1024 * 1024, tmp_size_bytes)
|
||||
}
|
||||
};
|
||||
|
||||
let total_mb = total_bytes as f64 / (1024.0 * 1024.0);
|
||||
let usage_percent = (tmp_size_bytes as f64 / total_bytes as f64) * 100.0;
|
||||
let status = self.calculate_usage_status(tmp_size_bytes, total_bytes);
|
||||
|
||||
metrics.push(Metric {
|
||||
name: "disk_tmp_size_mb".to_string(),
|
||||
value: MetricValue::Float(tmp_size_mb as f32),
|
||||
unit: Some("MB".to_string()),
|
||||
description: Some(format!("Used: {:.1} MB", tmp_size_mb)),
|
||||
status,
|
||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
||||
});
|
||||
|
||||
metrics.push(Metric {
|
||||
name: "disk_tmp_total_mb".to_string(),
|
||||
value: MetricValue::Float(total_mb as f32),
|
||||
unit: Some("MB".to_string()),
|
||||
description: Some(format!("Total: {:.1} MB", total_mb)),
|
||||
status: Status::Ok,
|
||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
||||
});
|
||||
|
||||
metrics.push(Metric {
|
||||
name: "disk_tmp_usage_percent".to_string(),
|
||||
value: MetricValue::Float(usage_percent as f32),
|
||||
unit: Some("%".to_string()),
|
||||
description: Some(format!("Usage: {:.1}%", usage_percent)),
|
||||
status,
|
||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Failed to get /tmp size: {}", e);
|
||||
metrics.push(Metric {
|
||||
name: "disk_tmp_size_mb".to_string(),
|
||||
value: MetricValue::String("error".to_string()),
|
||||
unit: Some("MB".to_string()),
|
||||
description: Some(format!("Error: {}", e)),
|
||||
status: Status::Unknown,
|
||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let collection_time = start_time.elapsed();
|
||||
debug!("Disk collection completed in {:?} with {} metrics",
|
||||
collection_time, metrics.len());
|
||||
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
fn get_performance_metrics(&self) -> Option<PerformanceMetrics> {
|
||||
None // Performance tracking handled by cache system
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user