Fix dashboard connectivity by aggregating metric fragments
The issue was that the metric-level system was sending individual metric fragments (CPU load, temperature separately) instead of complete System/Service messages that the dashboard expects. Now aggregates individual metrics into complete messages: - CPU load + temperature -> complete System message - Memory + processes -> complete System message - Service metrics remain as complete messages This should resolve 'No data received' on srv01 while maintaining the 5-second CPU metric update frequency.
This commit is contained in:
parent
3a959e55ed
commit
246973ebf6
@ -3,6 +3,7 @@ use std::time::Duration;
|
|||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use gethostname::gethostname;
|
use gethostname::gethostname;
|
||||||
use tokio::time::interval;
|
use tokio::time::interval;
|
||||||
|
use serde_json::{Value, json};
|
||||||
use tracing::{info, error, warn, debug};
|
use tracing::{info, error, warn, debug};
|
||||||
use zmq::{Context, Socket, SocketType};
|
use zmq::{Context, Socket, SocketType};
|
||||||
|
|
||||||
@ -128,24 +129,32 @@ impl SmartAgent {
|
|||||||
async fn collect_realtime_metrics(&mut self) {
|
async fn collect_realtime_metrics(&mut self) {
|
||||||
info!("Collecting RealTime metrics (5s)...");
|
info!("Collecting RealTime metrics (5s)...");
|
||||||
|
|
||||||
// System CPU metrics
|
// Collect and aggregate System metrics into complete message
|
||||||
match self.metric_manager.get_metric(&AgentType::System, "cpu_load").await {
|
let mut system_data = json!({});
|
||||||
Ok(cpu_load) => {
|
|
||||||
info!("Successfully collected CPU load metric");
|
if let Ok(cpu_load) = self.metric_manager.get_metric(&AgentType::System, "cpu_load").await {
|
||||||
self.send_metric_data(&AgentType::System, &cpu_load).await;
|
if let Some(obj) = cpu_load.as_object() {
|
||||||
|
for (key, value) in obj {
|
||||||
|
system_data[key] = value.clone();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => error!("Failed to collect CPU load metric: {}", e),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
match self.metric_manager.get_metric(&AgentType::System, "cpu_temperature").await {
|
if let Ok(cpu_temp) = self.metric_manager.get_metric(&AgentType::System, "cpu_temperature").await {
|
||||||
Ok(cpu_temp) => {
|
if let Some(obj) = cpu_temp.as_object() {
|
||||||
info!("Successfully collected CPU temperature metric");
|
for (key, value) in obj {
|
||||||
self.send_metric_data(&AgentType::System, &cpu_temp).await;
|
system_data[key] = value.clone();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => error!("Failed to collect CPU temperature metric: {}", e),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Service CPU usage
|
// Send complete System message if we have any data
|
||||||
|
if !system_data.as_object().unwrap().is_empty() {
|
||||||
|
info!("Sending aggregated System metrics");
|
||||||
|
self.send_metric_data(&AgentType::System, &system_data).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Service CPU usage (complete message)
|
||||||
match self.metric_manager.get_metric(&AgentType::Service, "cpu_usage").await {
|
match self.metric_manager.get_metric(&AgentType::Service, "cpu_usage").await {
|
||||||
Ok(service_cpu) => {
|
Ok(service_cpu) => {
|
||||||
info!("Successfully collected Service CPU usage metric");
|
info!("Successfully collected Service CPU usage metric");
|
||||||
@ -159,25 +168,32 @@ impl SmartAgent {
|
|||||||
async fn collect_fast_metrics(&mut self) {
|
async fn collect_fast_metrics(&mut self) {
|
||||||
info!("Collecting Fast metrics (30s)...");
|
info!("Collecting Fast metrics (30s)...");
|
||||||
|
|
||||||
// System memory
|
// Collect and aggregate System metrics into complete message
|
||||||
match self.metric_manager.get_metric(&AgentType::System, "memory").await {
|
let mut system_data = json!({});
|
||||||
Ok(memory) => {
|
|
||||||
info!("Successfully collected System memory metric");
|
if let Ok(memory) = self.metric_manager.get_metric(&AgentType::System, "memory").await {
|
||||||
self.send_metric_data(&AgentType::System, &memory).await;
|
if let Some(obj) = memory.as_object() {
|
||||||
|
for (key, value) in obj {
|
||||||
|
system_data[key] = value.clone();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => error!("Failed to collect System memory metric: {}", e),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Top processes
|
if let Ok(processes) = self.metric_manager.get_metric(&AgentType::System, "top_processes").await {
|
||||||
match self.metric_manager.get_metric(&AgentType::System, "top_processes").await {
|
if let Some(obj) = processes.as_object() {
|
||||||
Ok(processes) => {
|
for (key, value) in obj {
|
||||||
info!("Successfully collected top processes metric");
|
system_data[key] = value.clone();
|
||||||
self.send_metric_data(&AgentType::System, &processes).await;
|
}
|
||||||
}
|
}
|
||||||
Err(e) => error!("Failed to collect top processes metric: {}", e),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Service memory usage
|
// Send complete System message if we have any data
|
||||||
|
if !system_data.as_object().unwrap().is_empty() {
|
||||||
|
info!("Sending aggregated System metrics");
|
||||||
|
self.send_metric_data(&AgentType::System, &system_data).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Service memory usage (complete message)
|
||||||
match self.metric_manager.get_metric(&AgentType::Service, "memory_usage").await {
|
match self.metric_manager.get_metric(&AgentType::Service, "memory_usage").await {
|
||||||
Ok(service_memory) => {
|
Ok(service_memory) => {
|
||||||
info!("Successfully collected Service memory usage metric");
|
info!("Successfully collected Service memory usage metric");
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user