Compare commits

..

10 Commits

Author SHA1 Message Date
9a2df906ea Add ZMQ communication statistics tracking and display
All checks were successful
Build and Release / build-and-release (push) Successful in 1m10s
2025-11-27 16:14:45 +01:00
6d6beb207d Parse Docker image sizes to MB and sort services alphabetically
All checks were successful
Build and Release / build-and-release (push) Successful in 1m18s
2025-11-27 15:57:38 +01:00
7a68da01f5 Remove debug logging for NVMe SMART collection
All checks were successful
Build and Release / build-and-release (push) Successful in 1m9s
2025-11-27 15:40:16 +01:00
5be67fed64 Add debug logging for NVMe SMART data collection
All checks were successful
Build and Release / build-and-release (push) Successful in 1m19s
2025-11-27 15:00:48 +01:00
cac836601b Add NVMe device type flag for SMART data collection
All checks were successful
Build and Release / build-and-release (push) Successful in 1m19s
2025-11-27 13:34:30 +01:00
bd22ce265b Use direct smartctl with CAP_SYS_RAWIO instead of sudo
All checks were successful
Build and Release / build-and-release (push) Successful in 1m9s
2025-11-27 13:22:13 +01:00
bbc8b7b1cb Add info-level logging for SMART data collection debugging
All checks were successful
Build and Release / build-and-release (push) Successful in 1m19s
2025-11-27 13:15:53 +01:00
5dd8cadef3 Remove debug logging from Docker collection code
All checks were successful
Build and Release / build-and-release (push) Successful in 1m19s
2025-11-27 12:50:20 +01:00
fefe30ec51 Remove sudo from docker commands - use docker group membership instead
All checks were successful
Build and Release / build-and-release (push) Successful in 1m19s
Agent changes:
- Changed docker ps and docker images commands to run without sudo
- cm-agent user is already in docker group, so sudo is not needed
- Fixes "unable to change to root gid: Operation not permitted" error
- Systemd security restrictions were blocking sudo gid changes

This fixes Docker container and image collection on systems with
systemd security hardening enabled.

Updated to version 0.1.178
2025-11-27 12:35:38 +01:00
fb40cce748 Add stderr logging for Docker images command failure
All checks were successful
Build and Release / build-and-release (push) Successful in 1m9s
Agent changes:
- Log stderr output when docker images command fails
- This will show the actual error message (e.g., permission denied, docker not found)
- Helps diagnose why docker images collection is failing

Updated to version 0.1.177
2025-11-27 12:28:55 +01:00
10 changed files with 134 additions and 46 deletions

6
Cargo.lock generated
View File

@@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
[[package]] [[package]]
name = "cm-dashboard" name = "cm-dashboard"
version = "0.1.175" version = "0.1.185"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"chrono", "chrono",
@@ -301,7 +301,7 @@ dependencies = [
[[package]] [[package]]
name = "cm-dashboard-agent" name = "cm-dashboard-agent"
version = "0.1.175" version = "0.1.185"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-trait", "async-trait",
@@ -324,7 +324,7 @@ dependencies = [
[[package]] [[package]]
name = "cm-dashboard-shared" name = "cm-dashboard-shared"
version = "0.1.175" version = "0.1.185"
dependencies = [ dependencies = [
"chrono", "chrono",
"serde", "serde",

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "cm-dashboard-agent" name = "cm-dashboard-agent"
version = "0.1.176" version = "0.1.186"
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@@ -386,7 +386,7 @@ impl DiskCollector {
/// Get SMART data for drives /// Get SMART data for drives
async fn get_smart_data_for_drives(&self, physical_drives: &[PhysicalDrive], mergerfs_pools: &[MergerfsPool]) -> HashMap<String, SmartData> { async fn get_smart_data_for_drives(&self, physical_drives: &[PhysicalDrive], mergerfs_pools: &[MergerfsPool]) -> HashMap<String, SmartData> {
let mut smart_data = HashMap::new(); let mut smart_data = HashMap::new();
// Collect all drive names // Collect all drive names
let mut all_drives = std::collections::HashSet::new(); let mut all_drives = std::collections::HashSet::new();
for drive in physical_drives { for drive in physical_drives {
@@ -413,23 +413,24 @@ impl DiskCollector {
/// Get SMART data for a single drive /// Get SMART data for a single drive
async fn get_smart_data(&self, drive_name: &str) -> Result<SmartData, CollectorError> { async fn get_smart_data(&self, drive_name: &str) -> Result<SmartData, CollectorError> {
let output = Command::new("sudo") // Use direct smartctl (no sudo) - service has CAP_SYS_RAWIO and CAP_SYS_ADMIN capabilities
.args(&["smartctl", "-a", &format!("/dev/{}", drive_name)]) // For NVMe drives, specify device type explicitly
.output() let mut cmd = Command::new("smartctl");
if drive_name.starts_with("nvme") {
cmd.args(&["-d", "nvme", "-a", &format!("/dev/{}", drive_name)]);
} else {
cmd.args(&["-a", &format!("/dev/{}", drive_name)]);
}
let output = cmd.output()
.map_err(|e| CollectorError::SystemRead { .map_err(|e| CollectorError::SystemRead {
path: format!("SMART data for {}", drive_name), path: format!("SMART data for {}", drive_name),
error: e.to_string(), error: e.to_string(),
})?; })?;
let output_str = String::from_utf8_lossy(&output.stdout); let output_str = String::from_utf8_lossy(&output.stdout);
let error_str = String::from_utf8_lossy(&output.stderr);
// Debug logging for SMART command results
debug!("SMART output for {}: status={}, stdout_len={}, stderr={}",
drive_name, output.status, output_str.len(), error_str);
if !output.status.success() { if !output.status.success() {
debug!("SMART command failed for {}: {}", drive_name, error_str);
// Return unknown data rather than failing completely // Return unknown data rather than failing completely
return Ok(SmartData { return Ok(SmartData {
health: "UNKNOWN".to_string(), health: "UNKNOWN".to_string(),

View File

@@ -4,7 +4,7 @@ use cm_dashboard_shared::{AgentData, ServiceData, SubServiceData, SubServiceMetr
use std::process::Command; use std::process::Command;
use std::sync::RwLock; use std::sync::RwLock;
use std::time::Instant; use std::time::Instant;
use tracing::{debug, info}; use tracing::debug;
use super::{Collector, CollectorError}; use super::{Collector, CollectorError};
use crate::config::SystemdConfig; use crate::config::SystemdConfig;
@@ -118,10 +118,7 @@ impl SystemdCollector {
} }
if service_name.contains("docker") && active_status == "active" { if service_name.contains("docker") && active_status == "active" {
info!("Collecting Docker sub-services for service: {}", service_name);
let docker_containers = self.get_docker_containers(); let docker_containers = self.get_docker_containers();
info!("Found {} Docker containers", docker_containers.len());
for (container_name, container_status) in docker_containers { for (container_name, container_status) in docker_containers {
// For now, docker containers have no additional metrics // For now, docker containers have no additional metrics
// Future: could add memory_mb, cpu_percent, restart_count, etc. // Future: could add memory_mb, cpu_percent, restart_count, etc.
@@ -136,23 +133,20 @@ impl SystemdCollector {
// Add Docker images // Add Docker images
let docker_images = self.get_docker_images(); let docker_images = self.get_docker_images();
info!("Found {} Docker images", docker_images.len()); for (image_name, image_status, image_size_str, image_size_mb) in docker_images {
for (image_name, image_status, image_size) in docker_images {
let mut metrics = Vec::new(); let mut metrics = Vec::new();
metrics.push(SubServiceMetric { metrics.push(SubServiceMetric {
label: "size".to_string(), label: "size".to_string(),
value: 0.0, // Size as string in name instead value: image_size_mb,
unit: None, unit: Some("MB".to_string()),
}); });
sub_services.push(SubServiceData { sub_services.push(SubServiceData {
name: format!("{} ({})", image_name, image_size), name: format!("{} ({})", image_name, image_size_str),
service_status: self.calculate_service_status(&image_name, &image_status), service_status: self.calculate_service_status(&image_name, &image_status),
metrics, metrics,
}); });
} }
info!("Total Docker sub-services added: {}", sub_services.len());
} }
// Create complete service data // Create complete service data
@@ -175,6 +169,10 @@ impl SystemdCollector {
} }
} }
// Sort services alphabetically by name
agent_data.services.sort_by(|a, b| a.name.cmp(&b.name));
complete_service_data.sort_by(|a, b| a.name.cmp(&b.name));
// Update cached state // Update cached state
{ {
let mut state = self.state.write().unwrap(); let mut state = self.state.write().unwrap();
@@ -779,10 +777,10 @@ impl SystemdCollector {
fn get_docker_containers(&self) -> Vec<(String, String)> { fn get_docker_containers(&self) -> Vec<(String, String)> {
let mut containers = Vec::new(); let mut containers = Vec::new();
// Check if docker is available (use sudo for permissions) // Check if docker is available (cm-agent user is in docker group)
// Use -a to show ALL containers (running and stopped) // Use -a to show ALL containers (running and stopped)
let output = Command::new("sudo") let output = Command::new("docker")
.args(&["docker", "ps", "-a", "--format", "{{.Names}},{{.Status}}"]) .args(&["ps", "-a", "--format", "{{.Names}},{{.Status}}"])
.output(); .output();
let output = match output { let output = match output {
@@ -821,24 +819,19 @@ impl SystemdCollector {
} }
/// Get docker images as sub-services /// Get docker images as sub-services
fn get_docker_images(&self) -> Vec<(String, String, String)> { fn get_docker_images(&self) -> Vec<(String, String, String, f32)> {
let mut images = Vec::new(); let mut images = Vec::new();
// Check if docker is available (cm-agent user is in docker group)
info!("Collecting Docker images"); let output = Command::new("docker")
.args(&["images", "--format", "{{.Repository}}:{{.Tag}},{{.Size}}"])
// Check if docker is available (use sudo for permissions)
let output = Command::new("sudo")
.args(&["docker", "images", "--format", "{{.Repository}}:{{.Tag}},{{.Size}}"])
.output(); .output();
let output = match output { let output = match output {
Ok(out) if out.status.success() => out, Ok(out) if out.status.success() => out,
Ok(out) => { Ok(_) => {
info!("Docker images command failed with status: {}", out.status);
return images; return images;
} }
Err(e) => { Err(_) => {
info!("Docker images command error: {}", e);
return images; return images;
} }
}; };
@@ -856,23 +849,55 @@ impl SystemdCollector {
let parts: Vec<&str> = line.split(',').collect(); let parts: Vec<&str> = line.split(',').collect();
if parts.len() >= 2 { if parts.len() >= 2 {
let image_name = parts[0].trim(); let image_name = parts[0].trim();
let size = parts[1].trim(); let size_str = parts[1].trim();
// Skip <none>:<none> images (dangling images) // Skip <none>:<none> images (dangling images)
if image_name.contains("<none>") { if image_name.contains("<none>") {
continue; continue;
} }
// Parse size to MB (sizes come as "142MB", "1.5GB", "512kB", etc.)
let size_mb = self.parse_docker_size(size_str);
images.push(( images.push((
format!("image_{}", image_name), format!("image_{}", image_name),
"active".to_string(), // Images are always "active" (present) "active".to_string(), // Images are always "active" (present)
size.to_string() size_str.to_string(),
size_mb
)); ));
} }
} }
images images
} }
/// Parse Docker size string to MB
fn parse_docker_size(&self, size_str: &str) -> f32 {
let size_upper = size_str.to_uppercase();
// Extract numeric part and unit
let mut num_str = String::new();
let mut unit = String::new();
for ch in size_upper.chars() {
if ch.is_ascii_digit() || ch == '.' {
num_str.push(ch);
} else if ch.is_alphabetic() {
unit.push(ch);
}
}
let value: f32 = num_str.parse().unwrap_or(0.0);
// Convert to MB
match unit.as_str() {
"KB" | "K" => value / 1024.0,
"MB" | "M" => value,
"GB" | "G" => value * 1024.0,
"TB" | "T" => value * 1024.0 * 1024.0,
_ => value, // Assume bytes if no unit
}
}
} }
#[async_trait] #[async_trait]

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "cm-dashboard" name = "cm-dashboard"
version = "0.1.176" version = "0.1.186"
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@@ -215,7 +215,7 @@ impl Dashboard {
// Update TUI with new metrics (only if not headless) // Update TUI with new metrics (only if not headless)
if let Some(ref mut tui_app) = self.tui_app { if let Some(ref mut tui_app) = self.tui_app {
tui_app.update_metrics(&self.metric_store); tui_app.update_metrics(&mut self.metric_store);
} }
} }

View File

@@ -5,6 +5,14 @@ use tracing::{debug, info, warn};
use super::MetricDataPoint; use super::MetricDataPoint;
/// ZMQ communication statistics per host
#[derive(Debug, Clone)]
pub struct ZmqStats {
pub packets_received: u64,
pub last_packet_time: Instant,
pub last_packet_age_secs: f64,
}
/// Central metric storage for the dashboard /// Central metric storage for the dashboard
pub struct MetricStore { pub struct MetricStore {
/// Current structured data: hostname -> AgentData /// Current structured data: hostname -> AgentData
@@ -13,6 +21,8 @@ pub struct MetricStore {
historical_metrics: HashMap<String, Vec<MetricDataPoint>>, historical_metrics: HashMap<String, Vec<MetricDataPoint>>,
/// Last heartbeat timestamp per host /// Last heartbeat timestamp per host
last_heartbeat: HashMap<String, Instant>, last_heartbeat: HashMap<String, Instant>,
/// ZMQ communication statistics per host
zmq_stats: HashMap<String, ZmqStats>,
/// Configuration /// Configuration
max_metrics_per_host: usize, max_metrics_per_host: usize,
history_retention: Duration, history_retention: Duration,
@@ -24,6 +34,7 @@ impl MetricStore {
current_agent_data: HashMap::new(), current_agent_data: HashMap::new(),
historical_metrics: HashMap::new(), historical_metrics: HashMap::new(),
last_heartbeat: HashMap::new(), last_heartbeat: HashMap::new(),
zmq_stats: HashMap::new(),
max_metrics_per_host, max_metrics_per_host,
history_retention: Duration::from_secs(history_retention_hours * 3600), history_retention: Duration::from_secs(history_retention_hours * 3600),
} }
@@ -44,6 +55,16 @@ impl MetricStore {
self.last_heartbeat.insert(hostname.clone(), now); self.last_heartbeat.insert(hostname.clone(), now);
debug!("Updated heartbeat for host {}", hostname); debug!("Updated heartbeat for host {}", hostname);
// Update ZMQ stats
let stats = self.zmq_stats.entry(hostname.clone()).or_insert(ZmqStats {
packets_received: 0,
last_packet_time: now,
last_packet_age_secs: 0.0,
});
stats.packets_received += 1;
stats.last_packet_time = now;
stats.last_packet_age_secs = 0.0; // Just received
// Add to history // Add to history
let host_history = self let host_history = self
.historical_metrics .historical_metrics
@@ -65,6 +86,15 @@ impl MetricStore {
self.current_agent_data.get(hostname) self.current_agent_data.get(hostname)
} }
/// Get ZMQ communication statistics for a host
pub fn get_zmq_stats(&mut self, hostname: &str) -> Option<ZmqStats> {
let now = Instant::now();
self.zmq_stats.get_mut(hostname).map(|stats| {
// Update packet age
stats.last_packet_age_secs = now.duration_since(stats.last_packet_time).as_secs_f64();
stats.clone()
})
}
/// Get connected hosts (hosts with recent heartbeats) /// Get connected hosts (hosts with recent heartbeats)
pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> { pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> {

View File

@@ -100,7 +100,7 @@ impl TuiApp {
} }
/// Update widgets with structured data from store (only for current host) /// Update widgets with structured data from store (only for current host)
pub fn update_metrics(&mut self, metric_store: &MetricStore) { pub fn update_metrics(&mut self, metric_store: &mut MetricStore) {
if let Some(hostname) = self.current_host.clone() { if let Some(hostname) = self.current_host.clone() {
// Get structured data for this host // Get structured data for this host
if let Some(agent_data) = metric_store.get_agent_data(&hostname) { if let Some(agent_data) = metric_store.get_agent_data(&hostname) {
@@ -110,6 +110,14 @@ impl TuiApp {
host_widgets.system_widget.update_from_agent_data(agent_data); host_widgets.system_widget.update_from_agent_data(agent_data);
host_widgets.services_widget.update_from_agent_data(agent_data); host_widgets.services_widget.update_from_agent_data(agent_data);
// Update ZMQ stats
if let Some(zmq_stats) = metric_store.get_zmq_stats(&hostname) {
host_widgets.system_widget.update_zmq_stats(
zmq_stats.packets_received,
zmq_stats.last_packet_age_secs
);
}
host_widgets.last_update = Some(Instant::now()); host_widgets.last_update = Some(Instant::now());
} }
} }

View File

@@ -15,6 +15,10 @@ pub struct SystemWidget {
nixos_build: Option<String>, nixos_build: Option<String>,
agent_hash: Option<String>, agent_hash: Option<String>,
// ZMQ communication stats
zmq_packets_received: Option<u64>,
zmq_last_packet_age: Option<f64>,
// Network interfaces // Network interfaces
network_interfaces: Vec<cm_dashboard_shared::NetworkInterfaceData>, network_interfaces: Vec<cm_dashboard_shared::NetworkInterfaceData>,
@@ -92,6 +96,8 @@ impl SystemWidget {
Self { Self {
nixos_build: None, nixos_build: None,
agent_hash: None, agent_hash: None,
zmq_packets_received: None,
zmq_last_packet_age: None,
network_interfaces: Vec::new(), network_interfaces: Vec::new(),
cpu_load_1min: None, cpu_load_1min: None,
cpu_load_5min: None, cpu_load_5min: None,
@@ -154,6 +160,12 @@ impl SystemWidget {
pub fn _get_agent_hash(&self) -> Option<&String> { pub fn _get_agent_hash(&self) -> Option<&String> {
self.agent_hash.as_ref() self.agent_hash.as_ref()
} }
/// Update ZMQ communication statistics
pub fn update_zmq_stats(&mut self, packets_received: u64, last_packet_age_secs: f64) {
self.zmq_packets_received = Some(packets_received);
self.zmq_last_packet_age = Some(last_packet_age_secs);
}
} }
use super::Widget; use super::Widget;
@@ -796,6 +808,18 @@ impl SystemWidget {
Span::styled(format!("Agent: {}", agent_version_text), Typography::secondary()) Span::styled(format!("Agent: {}", agent_version_text), Typography::secondary())
])); ]));
// ZMQ communication stats
if let (Some(packets), Some(age)) = (self.zmq_packets_received, self.zmq_last_packet_age) {
let age_text = if age < 1.0 {
format!("{:.0}ms ago", age * 1000.0)
} else {
format!("{:.1}s ago", age)
};
lines.push(Line::from(vec![
Span::styled(format!("ZMQ: {} pkts, last {}", packets, age_text), Typography::secondary())
]));
}
// CPU section // CPU section
lines.push(Line::from(vec![ lines.push(Line::from(vec![
Span::styled("CPU:", Typography::widget_title()) Span::styled("CPU:", Typography::widget_title())

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "cm-dashboard-shared" name = "cm-dashboard-shared"
version = "0.1.176" version = "0.1.186"
edition = "2021" edition = "2021"
[dependencies] [dependencies]