Improve host disconnection detection and fix notification exclusions
All checks were successful
Build and Release / build-and-release (push) Successful in 1m34s

- Add dedicated heartbeat transmission every 5 seconds independent of metric collection
- Fix host offline detection by clearing metrics for disconnected hosts
- Move exclude_email_metrics to NotificationConfig for better organization
- Add cleanup_offline_hosts method to remove stale metrics after heartbeat timeout
- Ensure offline hosts show proper status icons and visual indicators

Version 0.1.63
This commit is contained in:
2025-11-08 11:33:32 +01:00
parent c980346d05
commit 0faed9309e
8 changed files with 66 additions and 15 deletions

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-agent"
version = "0.1.62"
version = "0.1.63"
edition = "2021"
[dependencies]

View File

@@ -78,10 +78,11 @@ impl Agent {
info!("Initial metric collection completed - all data cached and ready");
}
// Separate intervals for collection, transmission, and email notifications
// Separate intervals for collection, transmission, heartbeat, and email notifications
let mut collection_interval =
interval(Duration::from_secs(self.config.collection_interval_seconds));
let mut transmission_interval = interval(Duration::from_secs(self.config.zmq.transmission_interval_seconds));
let mut heartbeat_interval = interval(Duration::from_secs(self.config.zmq.heartbeat_interval_seconds));
let mut notification_interval = interval(Duration::from_secs(self.config.notifications.aggregation_interval_seconds));
loop {
@@ -98,6 +99,12 @@ impl Agent {
error!("Failed to broadcast metrics: {}", e);
}
}
_ = heartbeat_interval.tick() => {
// Send standalone heartbeat for host connectivity detection
if let Err(e) = self.send_heartbeat().await {
error!("Failed to send heartbeat: {}", e);
}
}
_ = notification_interval.tick() => {
// Process batched email notifications (separate from dashboard updates)
if let Err(e) = self.host_status_manager.process_pending_notifications(&mut self.notification_manager).await {
@@ -206,7 +213,7 @@ impl Agent {
let mut status_changed = false;
for metric in metrics {
// Filter excluded metrics from email notification processing only
if self.config.exclude_email_metrics.contains(&metric.name) {
if self.config.notifications.exclude_email_metrics.contains(&metric.name) {
debug!("Excluding metric '{}' from email notification processing", metric.name);
continue;
}
@@ -252,6 +259,19 @@ impl Agent {
)
}
/// Send standalone heartbeat for connectivity detection
async fn send_heartbeat(&mut self) -> Result<()> {
let heartbeat_metric = self.get_heartbeat_metric();
let message = MetricMessage::new(
self.hostname.clone(),
vec![heartbeat_metric],
);
self.zmq_handler.publish_metrics(&message).await?;
debug!("Sent standalone heartbeat for connectivity detection");
Ok(())
}
async fn handle_commands(&mut self) -> Result<()> {
// Try to receive commands (non-blocking)
match self.zmq_handler.try_receive_command() {

View File

@@ -17,9 +17,6 @@ pub struct AgentConfig {
pub notifications: NotificationConfig,
pub status_aggregation: HostStatusConfig,
pub collection_interval_seconds: u64,
/// List of metric names to exclude from email notifications
#[serde(default)]
pub exclude_email_metrics: Vec<String>,
}
/// ZMQ communication configuration
@@ -29,6 +26,9 @@ pub struct ZmqConfig {
pub command_port: u16,
pub bind_address: String,
pub transmission_interval_seconds: u64,
/// Heartbeat transmission interval in seconds for host connectivity detection
#[serde(default = "default_heartbeat_interval_seconds")]
pub heartbeat_interval_seconds: u64,
}
/// Collector configuration
@@ -147,9 +147,16 @@ pub struct NotificationConfig {
pub rate_limit_minutes: u64,
/// Email notification batching interval in seconds (default: 60)
pub aggregation_interval_seconds: u64,
/// List of metric names to exclude from email notifications
#[serde(default)]
pub exclude_email_metrics: Vec<String>,
}
fn default_heartbeat_interval_seconds() -> u64 {
5
}
impl AgentConfig {
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
loader::load_config(path)