Implement heartbeat-based host connectivity detection
All checks were successful
Build and Release / build-and-release (push) Successful in 2m8s

- Add agent_heartbeat metric to agent transmission for reliable host detection
- Update dashboard to track heartbeat timestamps per host instead of general metrics
- Add configurable heartbeat_timeout_seconds to dashboard ZMQ config (default 10s)
- Remove unused timeout_ms from agent config and revert to non-blocking command reception
- Remove unused heartbeat_interval_ms from agent configuration
- Host disconnect detection now uses dedicated heartbeat metrics for improved reliability
- Bump version to 0.1.57
This commit is contained in:
2025-11-06 11:04:01 +01:00
parent 0e7cf24dbb
commit 5f6e47ece5
12 changed files with 52 additions and 29 deletions

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-agent"
version = "0.1.56"
version = "0.1.57"
edition = "2021"
[dependencies]

View File

@@ -180,6 +180,10 @@ impl Agent {
let version_metric = self.get_agent_version_metric();
metrics.push(version_metric);
// Add heartbeat metric for host connectivity detection
let heartbeat_metric = self.get_heartbeat_metric();
metrics.push(heartbeat_metric);
// Check for user-stopped services that are now active and clear their flags
self.clear_user_stopped_flags_for_active_services(&metrics);
@@ -232,6 +236,21 @@ impl Agent {
format!("v{}", env!("CARGO_PKG_VERSION"))
}
/// Create heartbeat metric for host connectivity detection
fn get_heartbeat_metric(&self) -> Metric {
use std::time::{SystemTime, UNIX_EPOCH};
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs();
Metric::new(
"agent_heartbeat".to_string(),
MetricValue::Integer(timestamp as i64),
Status::Ok,
)
}
async fn handle_commands(&mut self) -> Result<()> {
// Try to receive commands (non-blocking)

View File

@@ -66,8 +66,6 @@ impl ZmqHandler {
}
/// Send heartbeat (placeholder for future use)
/// Try to receive a command (non-blocking)
pub fn try_receive_command(&self) -> Result<Option<AgentCommand>> {
match self.command_receiver.recv_bytes(zmq::DONTWAIT) {

View File

@@ -28,8 +28,6 @@ pub struct ZmqConfig {
pub publisher_port: u16,
pub command_port: u16,
pub bind_address: String,
pub timeout_ms: u64,
pub heartbeat_interval_ms: u64,
pub transmission_interval_seconds: u64,
}

View File

@@ -19,10 +19,6 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
bail!("ZMQ bind address cannot be empty");
}
if config.zmq.timeout_ms == 0 {
bail!("ZMQ timeout cannot be 0");
}
// Validate collection interval
if config.collection_interval_seconds == 0 {
bail!("Collection interval cannot be 0");