Fix dashboard connectivity by aggregating metric fragments

The issue was that the metric-level system was sending individual
metric fragments (CPU load, temperature separately) instead of
complete System/Service messages that the dashboard expects.

Now aggregates individual metrics into complete messages:
- CPU load + temperature -> complete System message
- Memory + processes -> complete System message
- Service metrics remain as complete messages

This should resolve 'No data received' on srv01 while maintaining
the 5-second CPU metric update frequency.
This commit is contained in:
Christoffer Martinsson 2025-10-16 00:25:23 +02:00
parent 3a959e55ed
commit 246973ebf6

View File

@ -3,6 +3,7 @@ use std::time::Duration;
use chrono::Utc; use chrono::Utc;
use gethostname::gethostname; use gethostname::gethostname;
use tokio::time::interval; use tokio::time::interval;
use serde_json::{Value, json};
use tracing::{info, error, warn, debug}; use tracing::{info, error, warn, debug};
use zmq::{Context, Socket, SocketType}; use zmq::{Context, Socket, SocketType};
@ -128,24 +129,32 @@ impl SmartAgent {
async fn collect_realtime_metrics(&mut self) { async fn collect_realtime_metrics(&mut self) {
info!("Collecting RealTime metrics (5s)..."); info!("Collecting RealTime metrics (5s)...");
// System CPU metrics // Collect and aggregate System metrics into complete message
match self.metric_manager.get_metric(&AgentType::System, "cpu_load").await { let mut system_data = json!({});
Ok(cpu_load) => {
info!("Successfully collected CPU load metric"); if let Ok(cpu_load) = self.metric_manager.get_metric(&AgentType::System, "cpu_load").await {
self.send_metric_data(&AgentType::System, &cpu_load).await; if let Some(obj) = cpu_load.as_object() {
for (key, value) in obj {
system_data[key] = value.clone();
}
} }
Err(e) => error!("Failed to collect CPU load metric: {}", e),
} }
match self.metric_manager.get_metric(&AgentType::System, "cpu_temperature").await { if let Ok(cpu_temp) = self.metric_manager.get_metric(&AgentType::System, "cpu_temperature").await {
Ok(cpu_temp) => { if let Some(obj) = cpu_temp.as_object() {
info!("Successfully collected CPU temperature metric"); for (key, value) in obj {
self.send_metric_data(&AgentType::System, &cpu_temp).await; system_data[key] = value.clone();
}
} }
Err(e) => error!("Failed to collect CPU temperature metric: {}", e),
} }
// Service CPU usage // Send complete System message if we have any data
if !system_data.as_object().unwrap().is_empty() {
info!("Sending aggregated System metrics");
self.send_metric_data(&AgentType::System, &system_data).await;
}
// Service CPU usage (complete message)
match self.metric_manager.get_metric(&AgentType::Service, "cpu_usage").await { match self.metric_manager.get_metric(&AgentType::Service, "cpu_usage").await {
Ok(service_cpu) => { Ok(service_cpu) => {
info!("Successfully collected Service CPU usage metric"); info!("Successfully collected Service CPU usage metric");
@ -159,25 +168,32 @@ impl SmartAgent {
async fn collect_fast_metrics(&mut self) { async fn collect_fast_metrics(&mut self) {
info!("Collecting Fast metrics (30s)..."); info!("Collecting Fast metrics (30s)...");
// System memory // Collect and aggregate System metrics into complete message
match self.metric_manager.get_metric(&AgentType::System, "memory").await { let mut system_data = json!({});
Ok(memory) => {
info!("Successfully collected System memory metric"); if let Ok(memory) = self.metric_manager.get_metric(&AgentType::System, "memory").await {
self.send_metric_data(&AgentType::System, &memory).await; if let Some(obj) = memory.as_object() {
for (key, value) in obj {
system_data[key] = value.clone();
}
} }
Err(e) => error!("Failed to collect System memory metric: {}", e),
} }
// Top processes if let Ok(processes) = self.metric_manager.get_metric(&AgentType::System, "top_processes").await {
match self.metric_manager.get_metric(&AgentType::System, "top_processes").await { if let Some(obj) = processes.as_object() {
Ok(processes) => { for (key, value) in obj {
info!("Successfully collected top processes metric"); system_data[key] = value.clone();
self.send_metric_data(&AgentType::System, &processes).await; }
} }
Err(e) => error!("Failed to collect top processes metric: {}", e),
} }
// Service memory usage // Send complete System message if we have any data
if !system_data.as_object().unwrap().is_empty() {
info!("Sending aggregated System metrics");
self.send_metric_data(&AgentType::System, &system_data).await;
}
// Service memory usage (complete message)
match self.metric_manager.get_metric(&AgentType::Service, "memory_usage").await { match self.metric_manager.get_metric(&AgentType::Service, "memory_usage").await {
Ok(service_memory) => { Ok(service_memory) => {
info!("Successfully collected Service memory usage metric"); info!("Successfully collected Service memory usage metric");