diff --git a/Cargo.lock b/Cargo.lock index f6afd02..637caff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cm-dashboard" -version = "0.1.204" +version = "0.1.205" dependencies = [ "anyhow", "chrono", @@ -301,7 +301,7 @@ dependencies = [ [[package]] name = "cm-dashboard-agent" -version = "0.1.204" +version = "0.1.205" dependencies = [ "anyhow", "async-trait", @@ -324,7 +324,7 @@ dependencies = [ [[package]] name = "cm-dashboard-shared" -version = "0.1.204" +version = "0.1.205" dependencies = [ "chrono", "serde", diff --git a/agent/src/collectors/systemd.rs b/agent/src/collectors/systemd.rs index 170d8b0..bff99a1 100644 --- a/agent/src/collectors/systemd.rs +++ b/agent/src/collectors/systemd.rs @@ -43,9 +43,10 @@ struct ServiceCacheState { /// Cached service status information from systemctl list-units #[derive(Debug, Clone)] struct ServiceStatusInfo { - load_state: String, active_state: String, - sub_state: String, + memory_bytes: Option, + restart_count: Option, + start_timestamp: Option, } impl SystemdCollector { @@ -86,11 +87,20 @@ impl SystemdCollector { let mut complete_service_data = Vec::new(); for service_name in &monitored_services { match self.get_service_status(service_name) { - Ok((active_status, _detailed_info)) => { + Ok(status_info) => { let mut sub_services = Vec::new(); + // Calculate uptime if we have start timestamp + let uptime_seconds = status_info.start_timestamp.and_then(|start| { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .ok()? + .as_secs(); + Some(now.saturating_sub(start)) + }); + // Sub-service metrics for specific services (always include cached results) - if service_name.contains("nginx") && active_status == "active" { + if service_name.contains("nginx") && status_info.active_state == "active" { let nginx_sites = self.get_nginx_site_metrics(); for (site_name, latency_ms) in nginx_sites { let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms { @@ -115,7 +125,7 @@ impl SystemdCollector { } } - if service_name.contains("docker") && active_status == "active" { + if service_name.contains("docker") && status_info.active_state == "active" { let docker_containers = self.get_docker_containers(); for (container_name, container_status) in docker_containers { // For now, docker containers have no additional metrics @@ -153,8 +163,11 @@ impl SystemdCollector { let service_data = ServiceData { name: service_name.clone(), user_stopped: false, // TODO: Integrate with service tracker - service_status: self.calculate_service_status(service_name, &active_status), + service_status: self.calculate_service_status(service_name, &status_info.active_state), sub_services, + memory_bytes: status_info.memory_bytes, + restart_count: status_info.restart_count, + uptime_seconds, }; // Add to AgentData and cache @@ -290,14 +303,13 @@ impl SystemdCollector { let fields: Vec<&str> = line.split_whitespace().collect(); if fields.len() >= 4 && fields[0].ends_with(".service") { let service_name = fields[0].trim_end_matches(".service"); - let load_state = fields.get(1).unwrap_or(&"unknown").to_string(); let active_state = fields.get(2).unwrap_or(&"unknown").to_string(); - let sub_state = fields.get(3).unwrap_or(&"unknown").to_string(); status_cache.insert(service_name.to_string(), ServiceStatusInfo { - load_state, active_state, - sub_state, + memory_bytes: None, + restart_count: None, + start_timestamp: None, }); } } @@ -306,9 +318,10 @@ impl SystemdCollector { for service_name in &all_service_names { if !status_cache.contains_key(service_name) { status_cache.insert(service_name.to_string(), ServiceStatusInfo { - load_state: "not-loaded".to_string(), active_state: "inactive".to_string(), - sub_state: "dead".to_string(), + memory_bytes: None, + restart_count: None, + start_timestamp: None, }); } } @@ -341,35 +354,63 @@ impl SystemdCollector { } /// Get service status from cache (if available) or fallback to systemctl - fn get_service_status(&self, service: &str) -> Result<(String, String)> { + fn get_service_status(&self, service: &str) -> Result { // Try to get status from cache first if let Ok(state) = self.state.read() { if let Some(cached_info) = state.service_status_cache.get(service) { - let active_status = cached_info.active_state.clone(); - let detailed_info = format!( - "LoadState={}\nActiveState={}\nSubState={}", - cached_info.load_state, - cached_info.active_state, - cached_info.sub_state - ); - return Ok((active_status, detailed_info)); + return Ok(cached_info.clone()); } } // Fallback to systemctl if not in cache (with 2 second timeout) let output = Command::new("timeout") - .args(&["2", "systemctl", "is-active", &format!("{}.service", service)]) + .args(&[ + "2", + "systemctl", + "show", + &format!("{}.service", service), + "--property=LoadState,ActiveState,SubState,MemoryCurrent,NRestarts,ExecMainStartTimestamp" + ]) .output()?; - let active_status = String::from_utf8(output.stdout)?.trim().to_string(); + let output_str = String::from_utf8(output.stdout)?; - // Get more detailed info (with 2 second timeout) - let output = Command::new("timeout") - .args(&["2", "systemctl", "show", &format!("{}.service", service), "--property=LoadState,ActiveState,SubState"]) - .output()?; + // Parse properties + let mut active_state = String::new(); + let mut memory_bytes = None; + let mut restart_count = None; + let mut start_timestamp = None; - let detailed_info = String::from_utf8(output.stdout)?; - Ok((active_status, detailed_info)) + for line in output_str.lines() { + if let Some(value) = line.strip_prefix("ActiveState=") { + active_state = value.to_string(); + } else if let Some(value) = line.strip_prefix("MemoryCurrent=") { + if value != "[not set]" { + memory_bytes = value.parse().ok(); + } + } else if let Some(value) = line.strip_prefix("NRestarts=") { + restart_count = value.parse().ok(); + } else if let Some(value) = line.strip_prefix("ExecMainStartTimestamp=") { + if value != "[not set]" && !value.is_empty() { + // Parse timestamp to seconds since epoch + if let Ok(output) = Command::new("date") + .args(&["+%s", "-d", value]) + .output() + { + if let Ok(timestamp_str) = String::from_utf8(output.stdout) { + start_timestamp = timestamp_str.trim().parse().ok(); + } + } + } + } + } + + Ok(ServiceStatusInfo { + active_state, + memory_bytes, + restart_count, + start_timestamp, + }) } /// Check if service name matches pattern (supports wildcards like nginx*) diff --git a/dashboard/src/ui/widgets/services.rs b/dashboard/src/ui/widgets/services.rs index 8bd070c..604bd0a 100644 --- a/dashboard/src/ui/widgets/services.rs +++ b/dashboard/src/ui/widgets/services.rs @@ -31,6 +31,9 @@ struct ServiceInfo { metrics: Vec<(String, f32, Option)>, // (label, value, unit) widget_status: Status, service_type: String, // "nginx_site", "container", "image", or empty for parent services + memory_bytes: Option, + restart_count: Option, + uptime_seconds: Option, } impl ServicesWidget { @@ -84,7 +87,7 @@ impl ServicesWidget { // Convert Status enum to display text let status_str = match info.widget_status { Status::Ok => "active", - Status::Inactive => "inactive", + Status::Inactive => "inactive", Status::Critical => "failed", Status::Pending => "pending", Status::Warning => "warning", @@ -92,9 +95,43 @@ impl ServicesWidget { Status::Offline => "offline", }; + // Format memory + let memory_str = info.memory_bytes.map_or("-".to_string(), |bytes| { + let mb = bytes as f64 / (1024.0 * 1024.0); + if mb >= 1000.0 { + format!("{:.1}G", mb / 1024.0) + } else { + format!("{:.0}M", mb) + } + }); + + // Format uptime + let uptime_str = info.uptime_seconds.map_or("-".to_string(), |secs| { + let days = secs / 86400; + let hours = (secs % 86400) / 3600; + let mins = (secs % 3600) / 60; + + if days > 0 { + format!("{}d{}h", days, hours) + } else if hours > 0 { + format!("{}h{}m", hours, mins) + } else { + format!("{}m", mins) + } + }); + + // Format restarts (show "!" if > 0 to indicate instability) + let restart_str = info.restart_count.map_or("-".to_string(), |count| { + if count > 0 { + format!("!{}", count) + } else { + "0".to_string() + } + }); + format!( - "{:<23} {:<10}", - short_name, status_str + "{:<23} {:<10} {:<8} {:<8} {:<5}", + short_name, status_str, memory_str, uptime_str, restart_str ) } @@ -280,6 +317,9 @@ impl Widget for ServicesWidget { metrics: Vec::new(), // Parent services don't have custom metrics widget_status: service.service_status, service_type: String::new(), // Parent services have no type + memory_bytes: service.memory_bytes, + restart_count: service.restart_count, + uptime_seconds: service.uptime_seconds, }; self.parent_services.insert(service.name.clone(), parent_info); @@ -296,6 +336,9 @@ impl Widget for ServicesWidget { metrics, widget_status: sub_service.service_status, service_type: sub_service.service_type.clone(), + memory_bytes: None, // Sub-services don't have individual metrics yet + restart_count: None, + uptime_seconds: None, }; sub_list.push((sub_service.name.clone(), sub_info)); } @@ -338,6 +381,9 @@ impl ServicesWidget { metrics: Vec::new(), widget_status: Status::Unknown, service_type: String::new(), + memory_bytes: None, + restart_count: None, + uptime_seconds: None, }); if metric.name.ends_with("_status") { @@ -364,6 +410,9 @@ impl ServicesWidget { metrics: Vec::new(), widget_status: Status::Unknown, service_type: String::new(), // Unknown type in legacy path + memory_bytes: None, + restart_count: None, + uptime_seconds: None, }, )); &mut sub_service_list.last_mut().unwrap().1 @@ -429,8 +478,8 @@ impl ServicesWidget { // Header let header = format!( - "{:<25} {:<10}", - "Service:", "Status:" + "{:<25} {:<10} {:<8} {:<8} {:<5}", + "Service:", "Status:", "RAM:", "Uptime:", "↻:" ); let header_para = Paragraph::new(header).style(Typography::muted()); frame.render_widget(header_para, content_chunks[0]); diff --git a/shared/src/agent_data.rs b/shared/src/agent_data.rs index 17e123b..82c7d03 100644 --- a/shared/src/agent_data.rs +++ b/shared/src/agent_data.rs @@ -139,6 +139,12 @@ pub struct ServiceData { pub user_stopped: bool, pub service_status: Status, pub sub_services: Vec, + /// Memory usage in bytes (from MemoryCurrent) + pub memory_bytes: Option, + /// Number of service restarts (from NRestarts) + pub restart_count: Option, + /// Uptime in seconds (calculated from ExecMainStartTimestamp) + pub uptime_seconds: Option, } /// Sub-service data (nginx sites, docker containers, etc.)