Implement real-time process monitoring and fix UI hardcoded data
This commit addresses several key issues identified during development: Major Changes: - Replace hardcoded top CPU/RAM process display with real system data - Add intelligent process monitoring to CpuCollector using ps command - Fix disk metrics permission issues in systemd collector - Optimize service collection to focus on status, memory, and disk only - Update dashboard widgets to display live process information Process Monitoring Implementation: - Added collect_top_cpu_process() and collect_top_ram_process() methods - Implemented ps-based monitoring with accurate CPU percentages - Added filtering to prevent self-monitoring artifacts (ps commands) - Enhanced error handling and validation for process data - Dashboard now shows realistic values like "claude (PID 2974) 11.0%" Service Collection Optimization: - Removed CPU monitoring from systemd collector for efficiency - Enhanced service directory permission error logging - Simplified services widget to show essential metrics only - Fixed service-to-directory mapping accuracy UI and Dashboard Improvements: - Reorganized dashboard layout with btop-inspired multi-panel design - Updated system panel to include real top CPU/RAM process display - Enhanced widget formatting and data presentation - Removed placeholder/hardcoded data throughout the interface Technical Details: - Updated agent/src/collectors/cpu.rs with process monitoring - Modified dashboard/src/ui/mod.rs for real-time process display - Enhanced systemd collector error handling and disk metrics - Updated CLAUDE.md documentation with implementation details
This commit is contained in:
204
dashboard/src/communication/mod.rs
Normal file
204
dashboard/src/communication/mod.rs
Normal file
@@ -0,0 +1,204 @@
|
||||
use anyhow::Result;
|
||||
use cm_dashboard_shared::{MetricMessage, MessageEnvelope, MessageType};
|
||||
use tracing::{info, error, debug, warn};
|
||||
use zmq::{Context, Socket, SocketType};
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::config::ZmqConfig;
|
||||
|
||||
/// Commands that can be sent to agents
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
pub enum AgentCommand {
|
||||
/// Request immediate metric collection
|
||||
CollectNow,
|
||||
/// Change collection interval
|
||||
SetInterval { seconds: u64 },
|
||||
/// Enable/disable a collector
|
||||
ToggleCollector { name: String, enabled: bool },
|
||||
/// Request status/health check
|
||||
Ping,
|
||||
}
|
||||
|
||||
/// ZMQ consumer for receiving metrics from agents
|
||||
pub struct ZmqConsumer {
|
||||
subscriber: Socket,
|
||||
config: ZmqConfig,
|
||||
connected_hosts: std::collections::HashSet<String>,
|
||||
}
|
||||
|
||||
impl ZmqConsumer {
|
||||
pub async fn new(config: &ZmqConfig) -> Result<Self> {
|
||||
let context = Context::new();
|
||||
|
||||
// Create subscriber socket
|
||||
let subscriber = context.socket(SocketType::SUB)?;
|
||||
|
||||
// Set socket options
|
||||
subscriber.set_rcvtimeo(1000)?; // 1 second timeout for non-blocking receives
|
||||
subscriber.set_subscribe(b"")?; // Subscribe to all messages
|
||||
|
||||
info!("ZMQ consumer initialized");
|
||||
|
||||
Ok(Self {
|
||||
subscriber,
|
||||
config: config.clone(),
|
||||
connected_hosts: std::collections::HashSet::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Connect to a specific host's agent
|
||||
pub async fn connect_to_host(&mut self, hostname: &str, port: u16) -> Result<()> {
|
||||
let address = format!("tcp://{}:{}", hostname, port);
|
||||
|
||||
match self.subscriber.connect(&address) {
|
||||
Ok(()) => {
|
||||
info!("Connected to agent at {}", address);
|
||||
self.connected_hosts.insert(hostname.to_string());
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to connect to agent at {}: {}", address, e);
|
||||
Err(anyhow::anyhow!("Failed to connect to {}: {}", address, e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Connect to predefined hosts
|
||||
pub async fn connect_to_predefined_hosts(&mut self, hosts: &[String]) -> Result<()> {
|
||||
let default_port = self.config.subscriber_ports[0];
|
||||
|
||||
for hostname in hosts {
|
||||
// Try to connect, but don't fail if some hosts are unreachable
|
||||
if let Err(e) = self.connect_to_host(hostname, default_port).await {
|
||||
warn!("Could not connect to {}: {}", hostname, e);
|
||||
}
|
||||
}
|
||||
|
||||
info!("Connected to {} out of {} configured hosts",
|
||||
self.connected_hosts.len(), hosts.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get list of newly connected hosts since last check
|
||||
pub fn get_newly_connected_hosts(&self) -> Vec<String> {
|
||||
// For now, return all connected hosts (could be enhanced with state tracking)
|
||||
self.connected_hosts.iter().cloned().collect()
|
||||
}
|
||||
|
||||
/// Receive metrics from any connected agent (non-blocking)
|
||||
pub async fn receive_metrics(&mut self) -> Result<Option<MetricMessage>> {
|
||||
match self.subscriber.recv_bytes(zmq::DONTWAIT) {
|
||||
Ok(data) => {
|
||||
debug!("Received {} bytes from ZMQ", data.len());
|
||||
|
||||
// Deserialize envelope
|
||||
let envelope: MessageEnvelope = serde_json::from_slice(&data)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to deserialize envelope: {}", e))?;
|
||||
|
||||
// Check message type
|
||||
match envelope.message_type {
|
||||
MessageType::Metrics => {
|
||||
let metrics = envelope.decode_metrics()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to decode metrics: {}", e))?;
|
||||
|
||||
debug!("Received {} metrics from {}",
|
||||
metrics.metrics.len(), metrics.hostname);
|
||||
|
||||
Ok(Some(metrics))
|
||||
}
|
||||
MessageType::Heartbeat => {
|
||||
debug!("Received heartbeat");
|
||||
Ok(None) // Don't return heartbeats as metrics
|
||||
}
|
||||
_ => {
|
||||
debug!("Received non-metrics message: {:?}", envelope.message_type);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(zmq::Error::EAGAIN) => {
|
||||
// No message available (non-blocking mode)
|
||||
Ok(None)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("ZMQ receive error: {}", e);
|
||||
Err(anyhow::anyhow!("ZMQ receive error: {}", e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get list of connected hosts
|
||||
pub fn get_connected_hosts(&self) -> Vec<String> {
|
||||
self.connected_hosts.iter().cloned().collect()
|
||||
}
|
||||
|
||||
/// Check if connected to any hosts
|
||||
pub fn has_connections(&self) -> bool {
|
||||
!self.connected_hosts.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// ZMQ command sender for sending commands to agents
|
||||
pub struct ZmqCommandSender {
|
||||
context: Context,
|
||||
config: ZmqConfig,
|
||||
}
|
||||
|
||||
impl ZmqCommandSender {
|
||||
pub fn new(config: &ZmqConfig) -> Result<Self> {
|
||||
let context = Context::new();
|
||||
|
||||
info!("ZMQ command sender initialized");
|
||||
|
||||
Ok(Self {
|
||||
context,
|
||||
config: config.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Send a command to a specific agent
|
||||
pub async fn send_command(&self, hostname: &str, command: AgentCommand) -> Result<()> {
|
||||
// Create a new PUSH socket for this command (ZMQ best practice)
|
||||
let socket = self.context.socket(SocketType::PUSH)?;
|
||||
|
||||
// Set socket options
|
||||
socket.set_linger(1000)?; // Wait up to 1 second on close
|
||||
socket.set_sndtimeo(5000)?; // 5 second send timeout
|
||||
|
||||
// Connect to agent's command port (6131)
|
||||
let address = format!("tcp://{}:6131", hostname);
|
||||
socket.connect(&address)?;
|
||||
|
||||
// Serialize command
|
||||
let serialized = serde_json::to_vec(&command)?;
|
||||
|
||||
// Send command
|
||||
socket.send(&serialized, 0)?;
|
||||
|
||||
info!("Sent command {:?} to agent at {}", command, hostname);
|
||||
|
||||
// Socket will be automatically closed when dropped
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Send a command to all connected hosts
|
||||
pub async fn broadcast_command(&self, hosts: &[String], command: AgentCommand) -> Result<Vec<String>> {
|
||||
let mut failed_hosts = Vec::new();
|
||||
|
||||
for hostname in hosts {
|
||||
if let Err(e) = self.send_command(hostname, command.clone()).await {
|
||||
error!("Failed to send command to {}: {}", hostname, e);
|
||||
failed_hosts.push(hostname.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if failed_hosts.is_empty() {
|
||||
info!("Successfully broadcast command {:?} to {} hosts", command, hosts.len());
|
||||
} else {
|
||||
warn!("Failed to send command to {} hosts: {:?}", failed_hosts.len(), failed_hosts);
|
||||
}
|
||||
|
||||
Ok(failed_hosts)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user