Fix dashboard connectivity by aggregating metric fragments

The issue was that the metric-level system was sending individual
metric fragments (CPU load, temperature separately) instead of
complete System/Service messages that the dashboard expects.

Now aggregates individual metrics into complete messages:
- CPU load + temperature -> complete System message
- Memory + processes -> complete System message
- Service metrics remain as complete messages

This should resolve 'No data received' on srv01 while maintaining
the 5-second CPU metric update frequency.
This commit is contained in:
Christoffer Martinsson 2025-10-16 00:25:23 +02:00
parent 3a959e55ed
commit 246973ebf6

View File

@ -3,6 +3,7 @@ use std::time::Duration;
use chrono::Utc;
use gethostname::gethostname;
use tokio::time::interval;
use serde_json::{Value, json};
use tracing::{info, error, warn, debug};
use zmq::{Context, Socket, SocketType};
@ -128,24 +129,32 @@ impl SmartAgent {
async fn collect_realtime_metrics(&mut self) {
info!("Collecting RealTime metrics (5s)...");
// System CPU metrics
match self.metric_manager.get_metric(&AgentType::System, "cpu_load").await {
Ok(cpu_load) => {
info!("Successfully collected CPU load metric");
self.send_metric_data(&AgentType::System, &cpu_load).await;
// Collect and aggregate System metrics into complete message
let mut system_data = json!({});
if let Ok(cpu_load) = self.metric_manager.get_metric(&AgentType::System, "cpu_load").await {
if let Some(obj) = cpu_load.as_object() {
for (key, value) in obj {
system_data[key] = value.clone();
}
}
Err(e) => error!("Failed to collect CPU load metric: {}", e),
}
match self.metric_manager.get_metric(&AgentType::System, "cpu_temperature").await {
Ok(cpu_temp) => {
info!("Successfully collected CPU temperature metric");
self.send_metric_data(&AgentType::System, &cpu_temp).await;
if let Ok(cpu_temp) = self.metric_manager.get_metric(&AgentType::System, "cpu_temperature").await {
if let Some(obj) = cpu_temp.as_object() {
for (key, value) in obj {
system_data[key] = value.clone();
}
}
Err(e) => error!("Failed to collect CPU temperature metric: {}", e),
}
// Service CPU usage
// Send complete System message if we have any data
if !system_data.as_object().unwrap().is_empty() {
info!("Sending aggregated System metrics");
self.send_metric_data(&AgentType::System, &system_data).await;
}
// Service CPU usage (complete message)
match self.metric_manager.get_metric(&AgentType::Service, "cpu_usage").await {
Ok(service_cpu) => {
info!("Successfully collected Service CPU usage metric");
@ -159,25 +168,32 @@ impl SmartAgent {
async fn collect_fast_metrics(&mut self) {
info!("Collecting Fast metrics (30s)...");
// System memory
match self.metric_manager.get_metric(&AgentType::System, "memory").await {
Ok(memory) => {
info!("Successfully collected System memory metric");
self.send_metric_data(&AgentType::System, &memory).await;
// Collect and aggregate System metrics into complete message
let mut system_data = json!({});
if let Ok(memory) = self.metric_manager.get_metric(&AgentType::System, "memory").await {
if let Some(obj) = memory.as_object() {
for (key, value) in obj {
system_data[key] = value.clone();
}
}
Err(e) => error!("Failed to collect System memory metric: {}", e),
}
// Top processes
match self.metric_manager.get_metric(&AgentType::System, "top_processes").await {
Ok(processes) => {
info!("Successfully collected top processes metric");
self.send_metric_data(&AgentType::System, &processes).await;
if let Ok(processes) = self.metric_manager.get_metric(&AgentType::System, "top_processes").await {
if let Some(obj) = processes.as_object() {
for (key, value) in obj {
system_data[key] = value.clone();
}
}
Err(e) => error!("Failed to collect top processes metric: {}", e),
}
// Service memory usage
// Send complete System message if we have any data
if !system_data.as_object().unwrap().is_empty() {
info!("Sending aggregated System metrics");
self.send_metric_data(&AgentType::System, &system_data).await;
}
// Service memory usage (complete message)
match self.metric_manager.get_metric(&AgentType::Service, "memory_usage").await {
Ok(service_memory) => {
info!("Successfully collected Service memory usage metric");