Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 9a2df906ea | |||
| 6d6beb207d | |||
| 7a68da01f5 | |||
| 5be67fed64 | |||
| cac836601b | |||
| bd22ce265b | |||
| bbc8b7b1cb | |||
| 5dd8cadef3 | |||
| fefe30ec51 | |||
| fb40cce748 | |||
| eaa057b284 | |||
| f23a1b5cec |
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.174"
|
||||
version = "0.1.185"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@@ -301,7 +301,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.174"
|
||||
version = "0.1.185"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -324,7 +324,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.174"
|
||||
version = "0.1.185"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.174"
|
||||
version = "0.1.186"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -386,7 +386,7 @@ impl DiskCollector {
|
||||
/// Get SMART data for drives
|
||||
async fn get_smart_data_for_drives(&self, physical_drives: &[PhysicalDrive], mergerfs_pools: &[MergerfsPool]) -> HashMap<String, SmartData> {
|
||||
let mut smart_data = HashMap::new();
|
||||
|
||||
|
||||
// Collect all drive names
|
||||
let mut all_drives = std::collections::HashSet::new();
|
||||
for drive in physical_drives {
|
||||
@@ -413,23 +413,24 @@ impl DiskCollector {
|
||||
|
||||
/// Get SMART data for a single drive
|
||||
async fn get_smart_data(&self, drive_name: &str) -> Result<SmartData, CollectorError> {
|
||||
let output = Command::new("sudo")
|
||||
.args(&["smartctl", "-a", &format!("/dev/{}", drive_name)])
|
||||
.output()
|
||||
// Use direct smartctl (no sudo) - service has CAP_SYS_RAWIO and CAP_SYS_ADMIN capabilities
|
||||
// For NVMe drives, specify device type explicitly
|
||||
let mut cmd = Command::new("smartctl");
|
||||
if drive_name.starts_with("nvme") {
|
||||
cmd.args(&["-d", "nvme", "-a", &format!("/dev/{}", drive_name)]);
|
||||
} else {
|
||||
cmd.args(&["-a", &format!("/dev/{}", drive_name)]);
|
||||
}
|
||||
|
||||
let output = cmd.output()
|
||||
.map_err(|e| CollectorError::SystemRead {
|
||||
path: format!("SMART data for {}", drive_name),
|
||||
error: e.to_string(),
|
||||
})?;
|
||||
|
||||
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||
let error_str = String::from_utf8_lossy(&output.stderr);
|
||||
|
||||
// Debug logging for SMART command results
|
||||
debug!("SMART output for {}: status={}, stdout_len={}, stderr={}",
|
||||
drive_name, output.status, output_str.len(), error_str);
|
||||
|
||||
|
||||
if !output.status.success() {
|
||||
debug!("SMART command failed for {}: {}", drive_name, error_str);
|
||||
// Return unknown data rather than failing completely
|
||||
return Ok(SmartData {
|
||||
health: "UNKNOWN".to_string(),
|
||||
|
||||
@@ -133,16 +133,16 @@ impl SystemdCollector {
|
||||
|
||||
// Add Docker images
|
||||
let docker_images = self.get_docker_images();
|
||||
for (image_name, image_status, image_size) in docker_images {
|
||||
for (image_name, image_status, image_size_str, image_size_mb) in docker_images {
|
||||
let mut metrics = Vec::new();
|
||||
metrics.push(SubServiceMetric {
|
||||
label: "size".to_string(),
|
||||
value: 0.0, // Size as string in name instead
|
||||
unit: None,
|
||||
value: image_size_mb,
|
||||
unit: Some("MB".to_string()),
|
||||
});
|
||||
|
||||
sub_services.push(SubServiceData {
|
||||
name: format!("{} ({})", image_name, image_size),
|
||||
name: format!("{} ({})", image_name, image_size_str),
|
||||
service_status: self.calculate_service_status(&image_name, &image_status),
|
||||
metrics,
|
||||
});
|
||||
@@ -169,6 +169,10 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
// Sort services alphabetically by name
|
||||
agent_data.services.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
complete_service_data.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
|
||||
// Update cached state
|
||||
{
|
||||
let mut state = self.state.write().unwrap();
|
||||
@@ -773,10 +777,10 @@ impl SystemdCollector {
|
||||
fn get_docker_containers(&self) -> Vec<(String, String)> {
|
||||
let mut containers = Vec::new();
|
||||
|
||||
// Check if docker is available (use sudo for permissions)
|
||||
// Check if docker is available (cm-agent user is in docker group)
|
||||
// Use -a to show ALL containers (running and stopped)
|
||||
let output = Command::new("sudo")
|
||||
.args(&["docker", "ps", "-a", "--format", "{{.Names}},{{.Status}}"])
|
||||
let output = Command::new("docker")
|
||||
.args(&["ps", "-a", "--format", "{{.Names}},{{.Status}}"])
|
||||
.output();
|
||||
|
||||
let output = match output {
|
||||
@@ -815,17 +819,21 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
/// Get docker images as sub-services
|
||||
fn get_docker_images(&self) -> Vec<(String, String, String)> {
|
||||
fn get_docker_images(&self) -> Vec<(String, String, String, f32)> {
|
||||
let mut images = Vec::new();
|
||||
|
||||
// Check if docker is available (use sudo for permissions)
|
||||
let output = Command::new("sudo")
|
||||
.args(&["docker", "images", "--format", "{{.Repository}}:{{.Tag}},{{.Size}}"])
|
||||
// Check if docker is available (cm-agent user is in docker group)
|
||||
let output = Command::new("docker")
|
||||
.args(&["images", "--format", "{{.Repository}}:{{.Tag}},{{.Size}}"])
|
||||
.output();
|
||||
|
||||
let output = match output {
|
||||
Ok(out) if out.status.success() => out,
|
||||
_ => return images, // Docker not available or failed
|
||||
Ok(_) => {
|
||||
return images;
|
||||
}
|
||||
Err(_) => {
|
||||
return images;
|
||||
}
|
||||
};
|
||||
|
||||
let output_str = match String::from_utf8(output.stdout) {
|
||||
@@ -841,23 +849,55 @@ impl SystemdCollector {
|
||||
let parts: Vec<&str> = line.split(',').collect();
|
||||
if parts.len() >= 2 {
|
||||
let image_name = parts[0].trim();
|
||||
let size = parts[1].trim();
|
||||
let size_str = parts[1].trim();
|
||||
|
||||
// Skip <none>:<none> images (dangling images)
|
||||
if image_name.contains("<none>") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse size to MB (sizes come as "142MB", "1.5GB", "512kB", etc.)
|
||||
let size_mb = self.parse_docker_size(size_str);
|
||||
|
||||
images.push((
|
||||
format!("image_{}", image_name),
|
||||
"active".to_string(), // Images are always "active" (present)
|
||||
size.to_string()
|
||||
size_str.to_string(),
|
||||
size_mb
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
images
|
||||
}
|
||||
|
||||
/// Parse Docker size string to MB
|
||||
fn parse_docker_size(&self, size_str: &str) -> f32 {
|
||||
let size_upper = size_str.to_uppercase();
|
||||
|
||||
// Extract numeric part and unit
|
||||
let mut num_str = String::new();
|
||||
let mut unit = String::new();
|
||||
|
||||
for ch in size_upper.chars() {
|
||||
if ch.is_ascii_digit() || ch == '.' {
|
||||
num_str.push(ch);
|
||||
} else if ch.is_alphabetic() {
|
||||
unit.push(ch);
|
||||
}
|
||||
}
|
||||
|
||||
let value: f32 = num_str.parse().unwrap_or(0.0);
|
||||
|
||||
// Convert to MB
|
||||
match unit.as_str() {
|
||||
"KB" | "K" => value / 1024.0,
|
||||
"MB" | "M" => value,
|
||||
"GB" | "G" => value * 1024.0,
|
||||
"TB" | "T" => value * 1024.0 * 1024.0,
|
||||
_ => value, // Assume bytes if no unit
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.174"
|
||||
version = "0.1.186"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -215,7 +215,7 @@ impl Dashboard {
|
||||
|
||||
// Update TUI with new metrics (only if not headless)
|
||||
if let Some(ref mut tui_app) = self.tui_app {
|
||||
tui_app.update_metrics(&self.metric_store);
|
||||
tui_app.update_metrics(&mut self.metric_store);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,14 @@ use tracing::{debug, info, warn};
|
||||
|
||||
use super::MetricDataPoint;
|
||||
|
||||
/// ZMQ communication statistics per host
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ZmqStats {
|
||||
pub packets_received: u64,
|
||||
pub last_packet_time: Instant,
|
||||
pub last_packet_age_secs: f64,
|
||||
}
|
||||
|
||||
/// Central metric storage for the dashboard
|
||||
pub struct MetricStore {
|
||||
/// Current structured data: hostname -> AgentData
|
||||
@@ -13,6 +21,8 @@ pub struct MetricStore {
|
||||
historical_metrics: HashMap<String, Vec<MetricDataPoint>>,
|
||||
/// Last heartbeat timestamp per host
|
||||
last_heartbeat: HashMap<String, Instant>,
|
||||
/// ZMQ communication statistics per host
|
||||
zmq_stats: HashMap<String, ZmqStats>,
|
||||
/// Configuration
|
||||
max_metrics_per_host: usize,
|
||||
history_retention: Duration,
|
||||
@@ -24,6 +34,7 @@ impl MetricStore {
|
||||
current_agent_data: HashMap::new(),
|
||||
historical_metrics: HashMap::new(),
|
||||
last_heartbeat: HashMap::new(),
|
||||
zmq_stats: HashMap::new(),
|
||||
max_metrics_per_host,
|
||||
history_retention: Duration::from_secs(history_retention_hours * 3600),
|
||||
}
|
||||
@@ -44,6 +55,16 @@ impl MetricStore {
|
||||
self.last_heartbeat.insert(hostname.clone(), now);
|
||||
debug!("Updated heartbeat for host {}", hostname);
|
||||
|
||||
// Update ZMQ stats
|
||||
let stats = self.zmq_stats.entry(hostname.clone()).or_insert(ZmqStats {
|
||||
packets_received: 0,
|
||||
last_packet_time: now,
|
||||
last_packet_age_secs: 0.0,
|
||||
});
|
||||
stats.packets_received += 1;
|
||||
stats.last_packet_time = now;
|
||||
stats.last_packet_age_secs = 0.0; // Just received
|
||||
|
||||
// Add to history
|
||||
let host_history = self
|
||||
.historical_metrics
|
||||
@@ -65,6 +86,15 @@ impl MetricStore {
|
||||
self.current_agent_data.get(hostname)
|
||||
}
|
||||
|
||||
/// Get ZMQ communication statistics for a host
|
||||
pub fn get_zmq_stats(&mut self, hostname: &str) -> Option<ZmqStats> {
|
||||
let now = Instant::now();
|
||||
self.zmq_stats.get_mut(hostname).map(|stats| {
|
||||
// Update packet age
|
||||
stats.last_packet_age_secs = now.duration_since(stats.last_packet_time).as_secs_f64();
|
||||
stats.clone()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get connected hosts (hosts with recent heartbeats)
|
||||
pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> {
|
||||
|
||||
@@ -100,7 +100,7 @@ impl TuiApp {
|
||||
}
|
||||
|
||||
/// Update widgets with structured data from store (only for current host)
|
||||
pub fn update_metrics(&mut self, metric_store: &MetricStore) {
|
||||
pub fn update_metrics(&mut self, metric_store: &mut MetricStore) {
|
||||
if let Some(hostname) = self.current_host.clone() {
|
||||
// Get structured data for this host
|
||||
if let Some(agent_data) = metric_store.get_agent_data(&hostname) {
|
||||
@@ -110,6 +110,14 @@ impl TuiApp {
|
||||
host_widgets.system_widget.update_from_agent_data(agent_data);
|
||||
host_widgets.services_widget.update_from_agent_data(agent_data);
|
||||
|
||||
// Update ZMQ stats
|
||||
if let Some(zmq_stats) = metric_store.get_zmq_stats(&hostname) {
|
||||
host_widgets.system_widget.update_zmq_stats(
|
||||
zmq_stats.packets_received,
|
||||
zmq_stats.last_packet_age_secs
|
||||
);
|
||||
}
|
||||
|
||||
host_widgets.last_update = Some(Instant::now());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,10 @@ pub struct SystemWidget {
|
||||
nixos_build: Option<String>,
|
||||
agent_hash: Option<String>,
|
||||
|
||||
// ZMQ communication stats
|
||||
zmq_packets_received: Option<u64>,
|
||||
zmq_last_packet_age: Option<f64>,
|
||||
|
||||
// Network interfaces
|
||||
network_interfaces: Vec<cm_dashboard_shared::NetworkInterfaceData>,
|
||||
|
||||
@@ -92,6 +96,8 @@ impl SystemWidget {
|
||||
Self {
|
||||
nixos_build: None,
|
||||
agent_hash: None,
|
||||
zmq_packets_received: None,
|
||||
zmq_last_packet_age: None,
|
||||
network_interfaces: Vec::new(),
|
||||
cpu_load_1min: None,
|
||||
cpu_load_5min: None,
|
||||
@@ -154,6 +160,12 @@ impl SystemWidget {
|
||||
pub fn _get_agent_hash(&self) -> Option<&String> {
|
||||
self.agent_hash.as_ref()
|
||||
}
|
||||
|
||||
/// Update ZMQ communication statistics
|
||||
pub fn update_zmq_stats(&mut self, packets_received: u64, last_packet_age_secs: f64) {
|
||||
self.zmq_packets_received = Some(packets_received);
|
||||
self.zmq_last_packet_age = Some(last_packet_age_secs);
|
||||
}
|
||||
}
|
||||
|
||||
use super::Widget;
|
||||
@@ -796,6 +808,18 @@ impl SystemWidget {
|
||||
Span::styled(format!("Agent: {}", agent_version_text), Typography::secondary())
|
||||
]));
|
||||
|
||||
// ZMQ communication stats
|
||||
if let (Some(packets), Some(age)) = (self.zmq_packets_received, self.zmq_last_packet_age) {
|
||||
let age_text = if age < 1.0 {
|
||||
format!("{:.0}ms ago", age * 1000.0)
|
||||
} else {
|
||||
format!("{:.1}s ago", age)
|
||||
};
|
||||
lines.push(Line::from(vec![
|
||||
Span::styled(format!("ZMQ: {} pkts, last {}", packets, age_text), Typography::secondary())
|
||||
]));
|
||||
}
|
||||
|
||||
// CPU section
|
||||
lines.push(Line::from(vec![
|
||||
Span::styled("CPU:", Typography::widget_title())
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.174"
|
||||
version = "0.1.186"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
Reference in New Issue
Block a user