Remove blocking CollectNow commands to fix heartbeat stability
All checks were successful
Build and Release / build-and-release (push) Successful in 1m9s

Eliminates automatic CollectNow command sending on host connection which
was blocking the main message processing loop for up to 5 seconds per
command. Since agents transmit cached data every 2 seconds anyway, the
CollectNow optimization provided minimal benefit while causing heartbeat
detection issues. Also removes unused send_command wrapper method.

This should completely resolve intermittent host connection dropping.
This commit is contained in:
Christoffer Martinsson 2025-11-15 11:41:58 +01:00
parent ec460496d8
commit db0e41a7d3
5 changed files with 10 additions and 32 deletions

6
Cargo.lock generated
View File

@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
[[package]]
name = "cm-dashboard"
version = "0.1.69"
version = "0.1.72"
dependencies = [
"anyhow",
"chrono",
@ -292,7 +292,7 @@ dependencies = [
[[package]]
name = "cm-dashboard-agent"
version = "0.1.69"
version = "0.1.72"
dependencies = [
"anyhow",
"async-trait",
@ -315,7 +315,7 @@ dependencies = [
[[package]]
name = "cm-dashboard-shared"
version = "0.1.69"
version = "0.1.72"
dependencies = [
"chrono",
"serde",

View File

@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-agent"
version = "0.1.72"
version = "0.1.73"
edition = "2021"
[dependencies]

View File

@ -1,6 +1,6 @@
[package]
name = "cm-dashboard"
version = "0.1.72"
version = "0.1.73"
edition = "2021"
[dependencies]

View File

@ -134,12 +134,6 @@ impl Dashboard {
})
}
/// Send a command to a specific agent
pub async fn send_command(&mut self, hostname: &str, command: AgentCommand) -> Result<()> {
self.zmq_command_sender
.send_command(hostname, command)
.await
}
pub async fn run(&mut self) -> Result<()> {
info!("Starting dashboard main loop");
@ -212,34 +206,18 @@ impl Dashboard {
metric_message.metrics.len()
);
// Check if this is the first time we've seen this host
// Track first contact with host (no command needed - agent sends data every 2s)
let is_new_host = !self
.initial_commands_sent
.contains(&metric_message.hostname);
if is_new_host {
info!(
"First contact with host {}, sending initial CollectNow command",
"First contact with host {} - data will update automatically",
metric_message.hostname
);
// Send CollectNow command for immediate refresh
if let Err(e) = self
.send_command(&metric_message.hostname, AgentCommand::CollectNow)
.await
{
error!(
"Failed to send initial CollectNow command to {}: {}",
metric_message.hostname, e
);
} else {
info!(
"✓ Sent initial CollectNow command to {}",
metric_message.hostname
);
self.initial_commands_sent
.insert(metric_message.hostname.clone());
}
self.initial_commands_sent
.insert(metric_message.hostname.clone());
}
// Update metric store

View File

@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-shared"
version = "0.1.72"
version = "0.1.73"
edition = "2021"
[dependencies]