Fix dashboard connectivity by aggregating metric fragments
The issue was that the metric-level system was sending individual metric fragments (CPU load, temperature separately) instead of complete System/Service messages that the dashboard expects. Now aggregates individual metrics into complete messages: - CPU load + temperature -> complete System message - Memory + processes -> complete System message - Service metrics remain as complete messages This should resolve 'No data received' on srv01 while maintaining the 5-second CPU metric update frequency.
This commit is contained in:
parent
3a959e55ed
commit
246973ebf6
@ -3,6 +3,7 @@ use std::time::Duration;
|
||||
use chrono::Utc;
|
||||
use gethostname::gethostname;
|
||||
use tokio::time::interval;
|
||||
use serde_json::{Value, json};
|
||||
use tracing::{info, error, warn, debug};
|
||||
use zmq::{Context, Socket, SocketType};
|
||||
|
||||
@ -128,24 +129,32 @@ impl SmartAgent {
|
||||
async fn collect_realtime_metrics(&mut self) {
|
||||
info!("Collecting RealTime metrics (5s)...");
|
||||
|
||||
// System CPU metrics
|
||||
match self.metric_manager.get_metric(&AgentType::System, "cpu_load").await {
|
||||
Ok(cpu_load) => {
|
||||
info!("Successfully collected CPU load metric");
|
||||
self.send_metric_data(&AgentType::System, &cpu_load).await;
|
||||
// Collect and aggregate System metrics into complete message
|
||||
let mut system_data = json!({});
|
||||
|
||||
if let Ok(cpu_load) = self.metric_manager.get_metric(&AgentType::System, "cpu_load").await {
|
||||
if let Some(obj) = cpu_load.as_object() {
|
||||
for (key, value) in obj {
|
||||
system_data[key] = value.clone();
|
||||
}
|
||||
}
|
||||
Err(e) => error!("Failed to collect CPU load metric: {}", e),
|
||||
}
|
||||
|
||||
match self.metric_manager.get_metric(&AgentType::System, "cpu_temperature").await {
|
||||
Ok(cpu_temp) => {
|
||||
info!("Successfully collected CPU temperature metric");
|
||||
self.send_metric_data(&AgentType::System, &cpu_temp).await;
|
||||
if let Ok(cpu_temp) = self.metric_manager.get_metric(&AgentType::System, "cpu_temperature").await {
|
||||
if let Some(obj) = cpu_temp.as_object() {
|
||||
for (key, value) in obj {
|
||||
system_data[key] = value.clone();
|
||||
}
|
||||
}
|
||||
Err(e) => error!("Failed to collect CPU temperature metric: {}", e),
|
||||
}
|
||||
|
||||
// Service CPU usage
|
||||
// Send complete System message if we have any data
|
||||
if !system_data.as_object().unwrap().is_empty() {
|
||||
info!("Sending aggregated System metrics");
|
||||
self.send_metric_data(&AgentType::System, &system_data).await;
|
||||
}
|
||||
|
||||
// Service CPU usage (complete message)
|
||||
match self.metric_manager.get_metric(&AgentType::Service, "cpu_usage").await {
|
||||
Ok(service_cpu) => {
|
||||
info!("Successfully collected Service CPU usage metric");
|
||||
@ -159,25 +168,32 @@ impl SmartAgent {
|
||||
async fn collect_fast_metrics(&mut self) {
|
||||
info!("Collecting Fast metrics (30s)...");
|
||||
|
||||
// System memory
|
||||
match self.metric_manager.get_metric(&AgentType::System, "memory").await {
|
||||
Ok(memory) => {
|
||||
info!("Successfully collected System memory metric");
|
||||
self.send_metric_data(&AgentType::System, &memory).await;
|
||||
// Collect and aggregate System metrics into complete message
|
||||
let mut system_data = json!({});
|
||||
|
||||
if let Ok(memory) = self.metric_manager.get_metric(&AgentType::System, "memory").await {
|
||||
if let Some(obj) = memory.as_object() {
|
||||
for (key, value) in obj {
|
||||
system_data[key] = value.clone();
|
||||
}
|
||||
}
|
||||
Err(e) => error!("Failed to collect System memory metric: {}", e),
|
||||
}
|
||||
|
||||
// Top processes
|
||||
match self.metric_manager.get_metric(&AgentType::System, "top_processes").await {
|
||||
Ok(processes) => {
|
||||
info!("Successfully collected top processes metric");
|
||||
self.send_metric_data(&AgentType::System, &processes).await;
|
||||
if let Ok(processes) = self.metric_manager.get_metric(&AgentType::System, "top_processes").await {
|
||||
if let Some(obj) = processes.as_object() {
|
||||
for (key, value) in obj {
|
||||
system_data[key] = value.clone();
|
||||
}
|
||||
}
|
||||
Err(e) => error!("Failed to collect top processes metric: {}", e),
|
||||
}
|
||||
|
||||
// Service memory usage
|
||||
// Send complete System message if we have any data
|
||||
if !system_data.as_object().unwrap().is_empty() {
|
||||
info!("Sending aggregated System metrics");
|
||||
self.send_metric_data(&AgentType::System, &system_data).await;
|
||||
}
|
||||
|
||||
// Service memory usage (complete message)
|
||||
match self.metric_manager.get_metric(&AgentType::Service, "memory_usage").await {
|
||||
Ok(service_memory) => {
|
||||
info!("Successfully collected Service memory usage metric");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user