Compare commits

..

5 Commits

Author SHA1 Message Date
278d1763aa Fix Tab key responsiveness with immediate UI refresh
All checks were successful
Build and Release / build-and-release (push) Successful in 2m10s
- Add immediate terminal.draw() call after input handling in main loop
- Eliminate delay between Tab key press and visual host switching
- Provide instant visual feedback for all navigation inputs
- Maintain existing metric update render cycle without duplication
- Fix UI update timing issue where changes only appeared on metric intervals
- Bump version to 0.1.59
2025-11-06 11:30:26 +01:00
f874264e13 Optimize dashboard performance for responsive Tab key navigation
All checks were successful
Build and Release / build-and-release (push) Successful in 1m32s
- Replace 6 separate filter operations with single-pass metric categorization in update_metrics
- Reduce CPU overhead from 6x to 1x work per metric update cycle
- Fix Tab key sluggishness caused by competing expensive filtering operations
- Maintain exact same functionality with significantly better performance
- Improve UI responsiveness for host switching and navigation
- Bump version to 0.1.58
2025-11-06 11:18:39 +01:00
5f6e47ece5 Implement heartbeat-based host connectivity detection
All checks were successful
Build and Release / build-and-release (push) Successful in 2m8s
- Add agent_heartbeat metric to agent transmission for reliable host detection
- Update dashboard to track heartbeat timestamps per host instead of general metrics
- Add configurable heartbeat_timeout_seconds to dashboard ZMQ config (default 10s)
- Remove unused timeout_ms from agent config and revert to non-blocking command reception
- Remove unused heartbeat_interval_ms from agent configuration
- Host disconnect detection now uses dedicated heartbeat metrics for improved reliability
- Bump version to 0.1.57
2025-11-06 11:04:01 +01:00
0e7cf24dbb Add exclude_email_metrics configuration option
All checks were successful
Build and Release / build-and-release (push) Successful in 2m34s
- Add exclude_email_metrics field to AgentConfig for filtering email notifications
- Metrics matching excluded names skip notification processing but still appear in dashboard
- Optional field with serde(default) for backward compatibility
- Bump version to 0.1.56
2025-11-06 10:31:25 +01:00
2d080a2f51 Implement WakeOnLAN functionality and offline status handling
All checks were successful
Build and Release / build-and-release (push) Successful in 1m35s
- Add WakeOnLAN support for offline hosts using 'w' key
- Configure MAC addresses for all infrastructure hosts
- Implement Status::Offline for disconnected hosts
- Exclude offline hosts from status aggregation to prevent false alerts
- Update versions to 0.1.55
2025-10-31 09:28:31 +01:00
13 changed files with 104 additions and 71 deletions

6
Cargo.lock generated
View File

@@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
[[package]]
name = "cm-dashboard"
version = "0.1.54"
version = "0.1.58"
dependencies = [
"anyhow",
"chrono",
@@ -292,7 +292,7 @@ dependencies = [
[[package]]
name = "cm-dashboard-agent"
version = "0.1.54"
version = "0.1.58"
dependencies = [
"anyhow",
"async-trait",
@@ -315,7 +315,7 @@ dependencies = [
[[package]]
name = "cm-dashboard-shared"
version = "0.1.54"
version = "0.1.58"
dependencies = [
"chrono",
"serde",

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-agent"
version = "0.1.54"
version = "0.1.59"
edition = "2021"
[dependencies]

View File

@@ -180,6 +180,10 @@ impl Agent {
let version_metric = self.get_agent_version_metric();
metrics.push(version_metric);
// Add heartbeat metric for host connectivity detection
let heartbeat_metric = self.get_heartbeat_metric();
metrics.push(heartbeat_metric);
// Check for user-stopped services that are now active and clear their flags
self.clear_user_stopped_flags_for_active_services(&metrics);
@@ -201,6 +205,12 @@ impl Agent {
async fn process_metrics(&mut self, metrics: &[Metric]) -> bool {
let mut status_changed = false;
for metric in metrics {
// Filter excluded metrics from email notification processing only
if self.config.exclude_email_metrics.contains(&metric.name) {
debug!("Excluding metric '{}' from email notification processing", metric.name);
continue;
}
if self.host_status_manager.process_metric(metric, &mut self.notification_manager).await {
status_changed = true;
}
@@ -226,6 +236,22 @@ impl Agent {
format!("v{}", env!("CARGO_PKG_VERSION"))
}
/// Create heartbeat metric for host connectivity detection
fn get_heartbeat_metric(&self) -> Metric {
use std::time::{SystemTime, UNIX_EPOCH};
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
Metric::new(
"agent_heartbeat".to_string(),
MetricValue::Integer(timestamp as i64),
Status::Ok,
)
}
async fn handle_commands(&mut self) -> Result<()> {
// Try to receive commands (non-blocking)
match self.zmq_handler.try_receive_command() {

View File

@@ -66,8 +66,6 @@ impl ZmqHandler {
}
/// Send heartbeat (placeholder for future use)
/// Try to receive a command (non-blocking)
pub fn try_receive_command(&self) -> Result<Option<AgentCommand>> {
match self.command_receiver.recv_bytes(zmq::DONTWAIT) {

View File

@@ -17,6 +17,9 @@ pub struct AgentConfig {
pub notifications: NotificationConfig,
pub status_aggregation: HostStatusConfig,
pub collection_interval_seconds: u64,
/// List of metric names to exclude from email notifications
#[serde(default)]
pub exclude_email_metrics: Vec<String>,
}
/// ZMQ communication configuration
@@ -25,8 +28,6 @@ pub struct ZmqConfig {
pub publisher_port: u16,
pub command_port: u16,
pub bind_address: String,
pub timeout_ms: u64,
pub heartbeat_interval_ms: u64,
pub transmission_interval_seconds: u64,
}

View File

@@ -19,10 +19,6 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
bail!("ZMQ bind address cannot be empty");
}
if config.zmq.timeout_ms == 0 {
bail!("ZMQ timeout cannot be 0");
}
// Validate collection interval
if config.collection_interval_seconds == 0 {
bail!("Collection interval cannot be 0");

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard"
version = "0.1.54"
version = "0.1.59"
edition = "2021"
[dependencies]

View File

@@ -22,7 +22,7 @@ pub struct Dashboard {
terminal: Option<Terminal<CrosstermBackend<io::Stdout>>>,
headless: bool,
initial_commands_sent: std::collections::HashSet<String>,
_config: DashboardConfig,
config: DashboardConfig,
}
impl Dashboard {
@@ -133,7 +133,7 @@ impl Dashboard {
terminal,
headless,
initial_commands_sent: std::collections::HashSet::new(),
_config: config,
config,
})
}
@@ -191,6 +191,17 @@ impl Dashboard {
break;
}
}
// Render UI immediately after handling input for responsive feedback
if let Some(ref mut terminal) = self.terminal {
if let Some(ref mut tui_app) = self.tui_app {
if let Err(e) = terminal.draw(|frame| {
tui_app.render(frame, &self.metric_store);
}) {
error!("Error rendering TUI after input: {}", e);
}
}
}
}
// Check for new metrics
@@ -247,7 +258,7 @@ impl Dashboard {
if let Some(ref mut tui_app) = self.tui_app {
let connected_hosts = self
.metric_store
.get_connected_hosts(Duration::from_secs(30));
.get_connected_hosts(Duration::from_secs(self.config.zmq.heartbeat_timeout_seconds));
tui_app.update_hosts(connected_hosts);

View File

@@ -141,9 +141,9 @@ impl ZmqConsumer {
}
}
/// Receive metrics from any connected agent (non-blocking)
/// Receive metrics from any connected agent (with timeout)
pub async fn receive_metrics(&mut self) -> Result<Option<MetricMessage>> {
match self.subscriber.recv_bytes(zmq::DONTWAIT) {
match self.subscriber.recv_bytes(0) {
Ok(data) => {
debug!("Received {} bytes from ZMQ", data.len());

View File

@@ -16,6 +16,13 @@ pub struct DashboardConfig {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ZmqConfig {
pub subscriber_ports: Vec<u16>,
/// Heartbeat timeout in seconds - hosts considered offline if no heartbeat received within this time
#[serde(default = "default_heartbeat_timeout_seconds")]
pub heartbeat_timeout_seconds: u64,
}
fn default_heartbeat_timeout_seconds() -> u64 {
10 // Default to 10 seconds - allows for multiple missed heartbeats
}
/// Individual host configuration details

View File

@@ -11,8 +11,8 @@ pub struct MetricStore {
current_metrics: HashMap<String, HashMap<String, Metric>>,
/// Historical metrics for trending
historical_metrics: HashMap<String, Vec<MetricDataPoint>>,
/// Last update timestamp per host
last_update: HashMap<String, Instant>,
/// Last heartbeat timestamp per host
last_heartbeat: HashMap<String, Instant>,
/// Configuration
max_metrics_per_host: usize,
history_retention: Duration,
@@ -23,7 +23,7 @@ impl MetricStore {
Self {
current_metrics: HashMap::new(),
historical_metrics: HashMap::new(),
last_update: HashMap::new(),
last_heartbeat: HashMap::new(),
max_metrics_per_host,
history_retention: Duration::from_secs(history_retention_hours * 3600),
}
@@ -56,10 +56,13 @@ impl MetricStore {
// Add to history
host_history.push(MetricDataPoint { received_at: now });
}
// Update last update timestamp
self.last_update.insert(hostname.to_string(), now);
// Track heartbeat metrics for connectivity detection
if metric_name == "agent_heartbeat" {
self.last_heartbeat.insert(hostname.to_string(), now);
debug!("Updated heartbeat for host {}", hostname);
}
}
// Get metrics count before cleanup
let metrics_count = host_metrics.len();
@@ -88,16 +91,18 @@ impl MetricStore {
}
}
/// Get connected hosts (hosts with recent updates)
/// Get connected hosts (hosts with recent heartbeats)
pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> {
let now = Instant::now();
self.last_update
self.last_heartbeat
.iter()
.filter_map(|(hostname, &last_update)| {
if now.duration_since(last_update) <= timeout {
.filter_map(|(hostname, &last_heartbeat)| {
if now.duration_since(last_heartbeat) <= timeout {
Some(hostname.clone())
} else {
debug!("Host {} considered offline - last heartbeat was {:?} ago",
hostname, now.duration_since(last_heartbeat));
None
}
})

View File

@@ -131,31 +131,31 @@ impl TuiApp {
// Only update widgets if we have metrics for this host
let all_metrics = metric_store.get_metrics_for_host(&hostname);
if !all_metrics.is_empty() {
// Get metrics first while hostname is borrowed
let cpu_metrics: Vec<&Metric> = all_metrics
.iter()
.filter(|m| {
m.name.starts_with("cpu_")
|| m.name.contains("c_state_")
|| m.name.starts_with("process_top_")
})
.copied()
.collect();
let memory_metrics: Vec<&Metric> = all_metrics
.iter()
.filter(|m| m.name.starts_with("memory_") || m.name.starts_with("disk_tmp_"))
.copied()
.collect();
let service_metrics: Vec<&Metric> = all_metrics
.iter()
.filter(|m| m.name.starts_with("service_"))
.copied()
.collect();
let all_backup_metrics: Vec<&Metric> = all_metrics
.iter()
.filter(|m| m.name.starts_with("backup_"))
.copied()
.collect();
// Single pass metric categorization for better performance
let mut cpu_metrics = Vec::new();
let mut memory_metrics = Vec::new();
let mut service_metrics = Vec::new();
let mut backup_metrics = Vec::new();
let mut nixos_metrics = Vec::new();
let mut disk_metrics = Vec::new();
for metric in all_metrics {
if metric.name.starts_with("cpu_")
|| metric.name.contains("c_state_")
|| metric.name.starts_with("process_top_") {
cpu_metrics.push(metric);
} else if metric.name.starts_with("memory_") || metric.name.starts_with("disk_tmp_") {
memory_metrics.push(metric);
} else if metric.name.starts_with("service_") {
service_metrics.push(metric);
} else if metric.name.starts_with("backup_") {
backup_metrics.push(metric);
} else if metric.name == "system_nixos_build" || metric.name == "system_active_users" || metric.name == "agent_version" {
nixos_metrics.push(metric);
} else if metric.name.starts_with("disk_") {
disk_metrics.push(metric);
}
}
// Clear completed transitions first
self.clear_completed_transitions(&hostname, &service_metrics);
@@ -166,21 +166,7 @@ impl TuiApp {
// Collect all system metrics (CPU, memory, NixOS, disk/storage)
let mut system_metrics = cpu_metrics;
system_metrics.extend(memory_metrics);
// Add NixOS metrics - using exact matching for build display fix
let nixos_metrics: Vec<&Metric> = all_metrics
.iter()
.filter(|m| m.name == "system_nixos_build" || m.name == "system_active_users" || m.name == "agent_version")
.copied()
.collect();
system_metrics.extend(nixos_metrics);
// Add disk/storage metrics
let disk_metrics: Vec<&Metric> = all_metrics
.iter()
.filter(|m| m.name.starts_with("disk_"))
.copied()
.collect();
system_metrics.extend(disk_metrics);
host_widgets.system_widget.update_from_metrics(&system_metrics);
@@ -189,7 +175,7 @@ impl TuiApp {
.update_from_metrics(&service_metrics);
host_widgets
.backup_widget
.update_from_metrics(&all_backup_metrics);
.update_from_metrics(&backup_metrics);
host_widgets.last_update = Some(Instant::now());
}
@@ -637,11 +623,14 @@ impl TuiApp {
return;
}
// Calculate worst-case status across all hosts
// Calculate worst-case status across all hosts (excluding offline)
let mut worst_status = Status::Ok;
for host in &self.available_hosts {
let host_status = self.calculate_host_status(host, metric_store);
worst_status = Status::aggregate(&[worst_status, host_status]);
// Don't include offline hosts in status aggregation
if host_status != Status::Offline {
worst_status = Status::aggregate(&[worst_status, host_status]);
}
}
// Use the worst status color as background

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-shared"
version = "0.1.54"
version = "0.1.59"
edition = "2021"
[dependencies]