Implement complete structured data architecture
All checks were successful
Build and Release / build-and-release (push) Successful in 2m10s
All checks were successful
Build and Release / build-and-release (push) Successful in 2m10s
Replace fragile string-based metrics with type-safe JSON data structures. Agent converts all metrics to structured data, dashboard processes typed fields. Changes: - Add AgentData struct with CPU, memory, storage, services, backup fields - Replace string parsing with direct field access throughout system - Maintain UI compatibility via temporary metric bridge conversion - Fix NVMe temperature display and eliminate string parsing bugs - Update protocol to support structured data transmission over ZMQ - Comprehensive metric type coverage: CPU, memory, storage, services, backup Version bump to 0.1.131
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.130"
|
||||
version = "0.1.131"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -185,42 +185,35 @@ impl Dashboard {
|
||||
|
||||
// Check for new metrics
|
||||
if last_metrics_check.elapsed() >= metrics_check_interval {
|
||||
if let Ok(Some(metric_message)) = self.zmq_consumer.receive_metrics().await {
|
||||
if let Ok(Some(agent_data)) = self.zmq_consumer.receive_agent_data().await {
|
||||
debug!(
|
||||
"Received metrics from {}: {} metrics",
|
||||
metric_message.hostname,
|
||||
metric_message.metrics.len()
|
||||
"Received agent data from {}",
|
||||
agent_data.hostname
|
||||
);
|
||||
|
||||
// Track first contact with host (no command needed - agent sends data every 2s)
|
||||
let is_new_host = !self
|
||||
.initial_commands_sent
|
||||
.contains(&metric_message.hostname);
|
||||
.contains(&agent_data.hostname);
|
||||
|
||||
if is_new_host {
|
||||
info!(
|
||||
"First contact with host {} - data will update automatically",
|
||||
metric_message.hostname
|
||||
agent_data.hostname
|
||||
);
|
||||
self.initial_commands_sent
|
||||
.insert(metric_message.hostname.clone());
|
||||
.insert(agent_data.hostname.clone());
|
||||
}
|
||||
|
||||
// Show raw data if requested (before processing)
|
||||
if self.raw_data {
|
||||
println!("RAW METRICS FROM {}: {} metrics", metric_message.hostname, metric_message.metrics.len());
|
||||
for metric in &metric_message.metrics {
|
||||
println!(" {}: {:?} ({:?})", metric.name, metric.value, metric.status);
|
||||
if let Some(desc) = &metric.description {
|
||||
println!(" └─ {}", desc);
|
||||
}
|
||||
}
|
||||
println!("RAW AGENT DATA FROM {}:", agent_data.hostname);
|
||||
println!("{}", serde_json::to_string_pretty(&agent_data).unwrap_or_else(|e| format!("Serialization error: {}", e)));
|
||||
println!("{}", "─".repeat(80));
|
||||
}
|
||||
|
||||
// Update metric store
|
||||
self.metric_store
|
||||
.update_metrics(&metric_message.hostname, metric_message.metrics);
|
||||
// Update data store
|
||||
self.metric_store.process_agent_data(agent_data);
|
||||
|
||||
// Check for agent version mismatches across hosts
|
||||
if let Some((current_version, outdated_hosts)) = self.metric_store.get_version_mismatches() {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use anyhow::Result;
|
||||
use cm_dashboard_shared::{CommandOutputMessage, MessageEnvelope, MessageType, MetricMessage};
|
||||
use cm_dashboard_shared::{AgentData, CommandOutputMessage, MessageEnvelope, MessageType};
|
||||
use tracing::{debug, error, info, warn};
|
||||
use zmq::{Context, Socket, SocketType};
|
||||
|
||||
@@ -117,8 +117,8 @@ impl ZmqConsumer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Receive metrics from any connected agent (non-blocking)
|
||||
pub async fn receive_metrics(&mut self) -> Result<Option<MetricMessage>> {
|
||||
/// Receive agent data (non-blocking)
|
||||
pub async fn receive_agent_data(&mut self) -> Result<Option<AgentData>> {
|
||||
match self.subscriber.recv_bytes(zmq::DONTWAIT) {
|
||||
Ok(data) => {
|
||||
debug!("Received {} bytes from ZMQ", data.len());
|
||||
@@ -129,29 +129,27 @@ impl ZmqConsumer {
|
||||
|
||||
// Check message type
|
||||
match envelope.message_type {
|
||||
MessageType::Metrics => {
|
||||
let metrics = envelope
|
||||
.decode_metrics()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to decode metrics: {}", e))?;
|
||||
MessageType::AgentData => {
|
||||
let agent_data = envelope
|
||||
.decode_agent_data()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to decode agent data: {}", e))?;
|
||||
|
||||
debug!(
|
||||
"Received {} metrics from {}",
|
||||
metrics.metrics.len(),
|
||||
metrics.hostname
|
||||
"Received agent data from host {}",
|
||||
agent_data.hostname
|
||||
);
|
||||
|
||||
Ok(Some(metrics))
|
||||
Ok(Some(agent_data))
|
||||
}
|
||||
MessageType::Heartbeat => {
|
||||
debug!("Received heartbeat");
|
||||
Ok(None) // Don't return heartbeats as metrics
|
||||
Ok(None) // Don't return heartbeats
|
||||
}
|
||||
MessageType::CommandOutput => {
|
||||
debug!("Received command output (will be handled by receive_command_output)");
|
||||
Ok(None) // Command output handled by separate method
|
||||
}
|
||||
_ => {
|
||||
debug!("Received non-metrics message: {:?}", envelope.message_type);
|
||||
debug!("Received unsupported message: {:?}", envelope.message_type);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
@@ -166,5 +164,6 @@ impl ZmqConsumer {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use cm_dashboard_shared::Metric;
|
||||
use cm_dashboard_shared::{AgentData, Metric};
|
||||
use std::collections::HashMap;
|
||||
use std::time::{Duration, Instant};
|
||||
use tracing::{debug, info, warn};
|
||||
@@ -76,6 +76,286 @@ impl MetricStore {
|
||||
);
|
||||
}
|
||||
|
||||
/// Process structured agent data (temporary bridge - converts back to metrics)
|
||||
/// TODO: Replace entire metric system with direct structured data processing
|
||||
pub fn process_agent_data(&mut self, agent_data: AgentData) {
|
||||
let metrics = self.convert_agent_data_to_metrics(&agent_data);
|
||||
self.update_metrics(&agent_data.hostname, metrics);
|
||||
}
|
||||
|
||||
/// Convert structured agent data to legacy metrics (temporary bridge)
|
||||
fn convert_agent_data_to_metrics(&self, agent_data: &AgentData) -> Vec<Metric> {
|
||||
use cm_dashboard_shared::{Metric, MetricValue, Status};
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
|
||||
// Convert CPU data
|
||||
metrics.push(Metric::new(
|
||||
"cpu_load_1min".to_string(),
|
||||
MetricValue::Float(agent_data.system.cpu.load_1min),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"cpu_load_5min".to_string(),
|
||||
MetricValue::Float(agent_data.system.cpu.load_5min),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"cpu_load_15min".to_string(),
|
||||
MetricValue::Float(agent_data.system.cpu.load_15min),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"cpu_frequency_mhz".to_string(),
|
||||
MetricValue::Float(agent_data.system.cpu.frequency_mhz),
|
||||
Status::Ok,
|
||||
));
|
||||
if let Some(temp) = agent_data.system.cpu.temperature_celsius {
|
||||
metrics.push(Metric::new(
|
||||
"cpu_temperature_celsius".to_string(),
|
||||
MetricValue::Float(temp),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
|
||||
// Convert Memory data
|
||||
metrics.push(Metric::new(
|
||||
"memory_usage_percent".to_string(),
|
||||
MetricValue::Float(agent_data.system.memory.usage_percent),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"memory_total_gb".to_string(),
|
||||
MetricValue::Float(agent_data.system.memory.total_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"memory_used_gb".to_string(),
|
||||
MetricValue::Float(agent_data.system.memory.used_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"memory_available_gb".to_string(),
|
||||
MetricValue::Float(agent_data.system.memory.available_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"memory_swap_total_gb".to_string(),
|
||||
MetricValue::Float(agent_data.system.memory.swap_total_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"memory_swap_used_gb".to_string(),
|
||||
MetricValue::Float(agent_data.system.memory.swap_used_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
|
||||
// Convert tmpfs data
|
||||
for tmpfs in &agent_data.system.memory.tmpfs {
|
||||
if tmpfs.mount == "/tmp" {
|
||||
metrics.push(Metric::new(
|
||||
"memory_tmp_usage_percent".to_string(),
|
||||
MetricValue::Float(tmpfs.usage_percent),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"memory_tmp_used_gb".to_string(),
|
||||
MetricValue::Float(tmpfs.used_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"memory_tmp_total_gb".to_string(),
|
||||
MetricValue::Float(tmpfs.total_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Add agent metadata
|
||||
metrics.push(Metric::new(
|
||||
"agent_version".to_string(),
|
||||
MetricValue::String(agent_data.agent_version.clone()),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
"agent_heartbeat".to_string(),
|
||||
MetricValue::Integer(agent_data.timestamp as i64),
|
||||
Status::Ok,
|
||||
));
|
||||
|
||||
// Convert storage data
|
||||
for drive in &agent_data.system.storage.drives {
|
||||
// Drive-level metrics
|
||||
if let Some(temp) = drive.temperature_celsius {
|
||||
metrics.push(Metric::new(
|
||||
format!("disk_{}_temperature", drive.name),
|
||||
MetricValue::Float(temp),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
if let Some(wear) = drive.wear_percent {
|
||||
metrics.push(Metric::new(
|
||||
format!("disk_{}_wear_percent", drive.name),
|
||||
MetricValue::Float(wear),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
metrics.push(Metric::new(
|
||||
format!("disk_{}_health", drive.name),
|
||||
MetricValue::String(drive.health.clone()),
|
||||
Status::Ok,
|
||||
));
|
||||
|
||||
// Filesystem metrics
|
||||
for fs in &drive.filesystems {
|
||||
let fs_base = format!("disk_{}_fs_{}", drive.name, fs.mount.replace('/', "root"));
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_usage_percent", fs_base),
|
||||
MetricValue::Float(fs.usage_percent),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_used_gb", fs_base),
|
||||
MetricValue::Float(fs.used_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_total_gb", fs_base),
|
||||
MetricValue::Float(fs.total_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Convert storage pools
|
||||
for pool in &agent_data.system.storage.pools {
|
||||
let pool_base = format!("disk_{}", pool.name);
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_usage_percent", pool_base),
|
||||
MetricValue::Float(pool.usage_percent),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_used_gb", pool_base),
|
||||
MetricValue::Float(pool.used_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_total_gb", pool_base),
|
||||
MetricValue::Float(pool.total_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_pool_type", pool_base),
|
||||
MetricValue::String(pool.pool_type.clone()),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_mount_point", pool_base),
|
||||
MetricValue::String(pool.mount.clone()),
|
||||
Status::Ok,
|
||||
));
|
||||
|
||||
// Pool drive data
|
||||
for drive in &pool.data_drives {
|
||||
if let Some(temp) = drive.temperature_celsius {
|
||||
metrics.push(Metric::new(
|
||||
format!("disk_{}_{}_temperature", pool.name, drive.name),
|
||||
MetricValue::Float(temp),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
if let Some(wear) = drive.wear_percent {
|
||||
metrics.push(Metric::new(
|
||||
format!("disk_{}_{}_wear_percent", pool.name, drive.name),
|
||||
MetricValue::Float(wear),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
}
|
||||
for drive in &pool.parity_drives {
|
||||
if let Some(temp) = drive.temperature_celsius {
|
||||
metrics.push(Metric::new(
|
||||
format!("disk_{}_{}_temperature", pool.name, drive.name),
|
||||
MetricValue::Float(temp),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
if let Some(wear) = drive.wear_percent {
|
||||
metrics.push(Metric::new(
|
||||
format!("disk_{}_{}_wear_percent", pool.name, drive.name),
|
||||
MetricValue::Float(wear),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert service data
|
||||
for service in &agent_data.services {
|
||||
let service_base = format!("service_{}", service.name);
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_status", service_base),
|
||||
MetricValue::String(service.status.clone()),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_memory_mb", service_base),
|
||||
MetricValue::Float(service.memory_mb),
|
||||
Status::Ok,
|
||||
));
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_disk_gb", service_base),
|
||||
MetricValue::Float(service.disk_gb),
|
||||
Status::Ok,
|
||||
));
|
||||
if service.user_stopped {
|
||||
metrics.push(Metric::new(
|
||||
format!("{}_user_stopped", service_base),
|
||||
MetricValue::Boolean(true),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Convert backup data
|
||||
metrics.push(Metric::new(
|
||||
"backup_status".to_string(),
|
||||
MetricValue::String(agent_data.backup.status.clone()),
|
||||
Status::Ok,
|
||||
));
|
||||
if let Some(last_run) = agent_data.backup.last_run {
|
||||
metrics.push(Metric::new(
|
||||
"backup_last_run_timestamp".to_string(),
|
||||
MetricValue::Integer(last_run as i64),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
if let Some(next_scheduled) = agent_data.backup.next_scheduled {
|
||||
metrics.push(Metric::new(
|
||||
"backup_next_scheduled_timestamp".to_string(),
|
||||
MetricValue::Integer(next_scheduled as i64),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
if let Some(size) = agent_data.backup.total_size_gb {
|
||||
metrics.push(Metric::new(
|
||||
"backup_size_gb".to_string(),
|
||||
MetricValue::Float(size),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
if let Some(health) = &agent_data.backup.repository_health {
|
||||
metrics.push(Metric::new(
|
||||
"backup_repository_health".to_string(),
|
||||
MetricValue::String(health.clone()),
|
||||
Status::Ok,
|
||||
));
|
||||
}
|
||||
|
||||
metrics
|
||||
}
|
||||
|
||||
/// Get current metric for a specific host
|
||||
pub fn get_metric(&self, hostname: &str, metric_name: &str) -> Option<&Metric> {
|
||||
self.current_metrics.get(hostname)?.get(metric_name)
|
||||
|
||||
@@ -380,16 +380,18 @@ impl SystemWidget {
|
||||
}
|
||||
}
|
||||
|
||||
// Handle physical drive metrics: disk_{drive}_health and disk_{drive}_wear_percent
|
||||
// Handle physical drive metrics: disk_{drive}_health, disk_{drive}_wear_percent, and disk_{drive}_temperature
|
||||
if (metric_name.ends_with("_health") && !metric_name.contains("_pool_health"))
|
||||
|| metric_name.ends_with("_wear_percent") {
|
||||
|| metric_name.ends_with("_wear_percent")
|
||||
|| metric_name.ends_with("_temperature") {
|
||||
// Count underscores to distinguish physical drive metrics (disk_{drive}_metric)
|
||||
// from pool drive metrics (disk_{pool}_{drive}_metric)
|
||||
let underscore_count = metric_name.matches('_').count();
|
||||
// disk_nvme0n1_wear_percent has 3 underscores: disk_nvme0n1_wear_percent
|
||||
if underscore_count == 3 { // disk_{drive}_wear_percent (where drive has underscores)
|
||||
if underscore_count == 3 { // disk_{drive}_metric (where drive has underscores)
|
||||
if let Some(suffix_pos) = metric_name.rfind("_health")
|
||||
.or_else(|| metric_name.rfind("_wear_percent")) {
|
||||
.or_else(|| metric_name.rfind("_wear_percent"))
|
||||
.or_else(|| metric_name.rfind("_temperature")) {
|
||||
return Some(metric_name[5..suffix_pos].to_string()); // Skip "disk_"
|
||||
}
|
||||
}
|
||||
@@ -468,15 +470,25 @@ impl SystemWidget {
|
||||
for pool in &self.storage_pools {
|
||||
// Pool header line with type and health
|
||||
let pool_label = if pool.pool_type.starts_with("drive (") {
|
||||
// For physical drives, show the drive name with wear percentage if available
|
||||
// Look for any drive with wear data (physical drives may have drives named after the pool)
|
||||
// For physical drives, show the drive name with temperature and wear percentage if available
|
||||
// Look for any drive with temp/wear data (physical drives may have drives named after the pool)
|
||||
let temp_opt = pool.drives.iter()
|
||||
.find_map(|d| d.temperature);
|
||||
let wear_opt = pool.drives.iter()
|
||||
.find_map(|d| d.wear_percent);
|
||||
|
||||
let mut drive_info = Vec::new();
|
||||
if let Some(temp) = temp_opt {
|
||||
drive_info.push(format!("T: {:.0}°C", temp));
|
||||
}
|
||||
if let Some(wear) = wear_opt {
|
||||
format!("{} W: {:.0}%:", pool.name, wear)
|
||||
} else {
|
||||
drive_info.push(format!("W: {:.0}%", wear));
|
||||
}
|
||||
|
||||
if drive_info.is_empty() {
|
||||
format!("{}:", pool.name)
|
||||
} else {
|
||||
format!("{} {}:", pool.name, drive_info.join(" "))
|
||||
}
|
||||
} else if pool.pool_type == "single" {
|
||||
format!("{}:", pool.mount_point)
|
||||
|
||||
Reference in New Issue
Block a user