Add detailed error logging for metric collection debugging

Added comprehensive error logging to identify why metrics are not being
collected successfully. This will help diagnose the 'No data received'
issue on srv01 by showing exactly which metrics are failing and why.
This commit is contained in:
Christoffer Martinsson 2025-10-15 23:29:42 +02:00
parent 10aa72816d
commit 6bc2ffd94b

View File

@ -129,17 +129,29 @@ impl SmartAgent {
info!("Collecting RealTime metrics (5s)...");
// System CPU metrics
if let Ok(cpu_load) = self.metric_manager.get_metric(&AgentType::System, "cpu_load").await {
self.send_metric_data(&AgentType::System, &cpu_load).await;
match self.metric_manager.get_metric(&AgentType::System, "cpu_load").await {
Ok(cpu_load) => {
info!("Successfully collected CPU load metric");
self.send_metric_data(&AgentType::System, &cpu_load).await;
}
Err(e) => error!("Failed to collect CPU load metric: {}", e),
}
if let Ok(cpu_temp) = self.metric_manager.get_metric(&AgentType::System, "cpu_temperature").await {
self.send_metric_data(&AgentType::System, &cpu_temp).await;
match self.metric_manager.get_metric(&AgentType::System, "cpu_temperature").await {
Ok(cpu_temp) => {
info!("Successfully collected CPU temperature metric");
self.send_metric_data(&AgentType::System, &cpu_temp).await;
}
Err(e) => error!("Failed to collect CPU temperature metric: {}", e),
}
// Service CPU usage
if let Ok(service_cpu) = self.metric_manager.get_metric(&AgentType::Service, "cpu_usage").await {
self.send_metric_data(&AgentType::Service, &service_cpu).await;
match self.metric_manager.get_metric(&AgentType::Service, "cpu_usage").await {
Ok(service_cpu) => {
info!("Successfully collected Service CPU usage metric");
self.send_metric_data(&AgentType::Service, &service_cpu).await;
}
Err(e) => error!("Failed to collect Service CPU usage metric: {}", e),
}
}
@ -148,18 +160,30 @@ impl SmartAgent {
info!("Collecting Fast metrics (30s)...");
// System memory
if let Ok(memory) = self.metric_manager.get_metric(&AgentType::System, "memory").await {
self.send_metric_data(&AgentType::System, &memory).await;
match self.metric_manager.get_metric(&AgentType::System, "memory").await {
Ok(memory) => {
info!("Successfully collected System memory metric");
self.send_metric_data(&AgentType::System, &memory).await;
}
Err(e) => error!("Failed to collect System memory metric: {}", e),
}
// Top processes
if let Ok(processes) = self.metric_manager.get_metric(&AgentType::System, "top_processes").await {
self.send_metric_data(&AgentType::System, &processes).await;
match self.metric_manager.get_metric(&AgentType::System, "top_processes").await {
Ok(processes) => {
info!("Successfully collected top processes metric");
self.send_metric_data(&AgentType::System, &processes).await;
}
Err(e) => error!("Failed to collect top processes metric: {}", e),
}
// Service memory usage
if let Ok(service_memory) = self.metric_manager.get_metric(&AgentType::Service, "memory_usage").await {
self.send_metric_data(&AgentType::Service, &service_memory).await;
match self.metric_manager.get_metric(&AgentType::Service, "memory_usage").await {
Ok(service_memory) => {
info!("Successfully collected Service memory usage metric");
self.send_metric_data(&AgentType::Service, &service_memory).await;
}
Err(e) => error!("Failed to collect Service memory usage metric: {}", e),
}
}