Add service metrics from systemctl (memory, uptime, restarts)
Shared: - Add memory_bytes, restart_count, uptime_seconds to ServiceData Agent: - Add new fields to ServiceStatusInfo struct - Fetch MemoryCurrent, NRestarts, ExecMainStartTimestamp from systemctl show - Calculate uptime from start timestamp - Parse and populate new fields in ServiceData - Remove unused load_state and sub_state fields Dashboard: - Add memory_bytes, restart_count, uptime_seconds to ServiceInfo - Update header: Service, Status, RAM, Uptime, ↻ (restarts) - Format memory as MB/GB - Format uptime as Xd Xh, Xh Xm, or Xm - Show restart count with ! prefix if > 0 to indicate instability All metrics obtained from single systemctl show call - zero overhead.
This commit is contained in:
parent
c3c9507a42
commit
0e01813ff5
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.204"
|
||||
version = "0.1.205"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@ -301,7 +301,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.204"
|
||||
version = "0.1.205"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@ -324,7 +324,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.204"
|
||||
version = "0.1.205"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
|
||||
@ -43,9 +43,10 @@ struct ServiceCacheState {
|
||||
/// Cached service status information from systemctl list-units
|
||||
#[derive(Debug, Clone)]
|
||||
struct ServiceStatusInfo {
|
||||
load_state: String,
|
||||
active_state: String,
|
||||
sub_state: String,
|
||||
memory_bytes: Option<u64>,
|
||||
restart_count: Option<u32>,
|
||||
start_timestamp: Option<u64>,
|
||||
}
|
||||
|
||||
impl SystemdCollector {
|
||||
@ -86,11 +87,20 @@ impl SystemdCollector {
|
||||
let mut complete_service_data = Vec::new();
|
||||
for service_name in &monitored_services {
|
||||
match self.get_service_status(service_name) {
|
||||
Ok((active_status, _detailed_info)) => {
|
||||
Ok(status_info) => {
|
||||
let mut sub_services = Vec::new();
|
||||
|
||||
// Calculate uptime if we have start timestamp
|
||||
let uptime_seconds = status_info.start_timestamp.and_then(|start| {
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.ok()?
|
||||
.as_secs();
|
||||
Some(now.saturating_sub(start))
|
||||
});
|
||||
|
||||
// Sub-service metrics for specific services (always include cached results)
|
||||
if service_name.contains("nginx") && active_status == "active" {
|
||||
if service_name.contains("nginx") && status_info.active_state == "active" {
|
||||
let nginx_sites = self.get_nginx_site_metrics();
|
||||
for (site_name, latency_ms) in nginx_sites {
|
||||
let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms {
|
||||
@ -115,7 +125,7 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
if service_name.contains("docker") && active_status == "active" {
|
||||
if service_name.contains("docker") && status_info.active_state == "active" {
|
||||
let docker_containers = self.get_docker_containers();
|
||||
for (container_name, container_status) in docker_containers {
|
||||
// For now, docker containers have no additional metrics
|
||||
@ -153,8 +163,11 @@ impl SystemdCollector {
|
||||
let service_data = ServiceData {
|
||||
name: service_name.clone(),
|
||||
user_stopped: false, // TODO: Integrate with service tracker
|
||||
service_status: self.calculate_service_status(service_name, &active_status),
|
||||
service_status: self.calculate_service_status(service_name, &status_info.active_state),
|
||||
sub_services,
|
||||
memory_bytes: status_info.memory_bytes,
|
||||
restart_count: status_info.restart_count,
|
||||
uptime_seconds,
|
||||
};
|
||||
|
||||
// Add to AgentData and cache
|
||||
@ -290,14 +303,13 @@ impl SystemdCollector {
|
||||
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
||||
let service_name = fields[0].trim_end_matches(".service");
|
||||
let load_state = fields.get(1).unwrap_or(&"unknown").to_string();
|
||||
let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
|
||||
let sub_state = fields.get(3).unwrap_or(&"unknown").to_string();
|
||||
|
||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||
load_state,
|
||||
active_state,
|
||||
sub_state,
|
||||
memory_bytes: None,
|
||||
restart_count: None,
|
||||
start_timestamp: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -306,9 +318,10 @@ impl SystemdCollector {
|
||||
for service_name in &all_service_names {
|
||||
if !status_cache.contains_key(service_name) {
|
||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||
load_state: "not-loaded".to_string(),
|
||||
active_state: "inactive".to_string(),
|
||||
sub_state: "dead".to_string(),
|
||||
memory_bytes: None,
|
||||
restart_count: None,
|
||||
start_timestamp: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -341,35 +354,63 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
/// Get service status from cache (if available) or fallback to systemctl
|
||||
fn get_service_status(&self, service: &str) -> Result<(String, String)> {
|
||||
fn get_service_status(&self, service: &str) -> Result<ServiceStatusInfo> {
|
||||
// Try to get status from cache first
|
||||
if let Ok(state) = self.state.read() {
|
||||
if let Some(cached_info) = state.service_status_cache.get(service) {
|
||||
let active_status = cached_info.active_state.clone();
|
||||
let detailed_info = format!(
|
||||
"LoadState={}\nActiveState={}\nSubState={}",
|
||||
cached_info.load_state,
|
||||
cached_info.active_state,
|
||||
cached_info.sub_state
|
||||
);
|
||||
return Ok((active_status, detailed_info));
|
||||
return Ok(cached_info.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to systemctl if not in cache (with 2 second timeout)
|
||||
let output = Command::new("timeout")
|
||||
.args(&["2", "systemctl", "is-active", &format!("{}.service", service)])
|
||||
.args(&[
|
||||
"2",
|
||||
"systemctl",
|
||||
"show",
|
||||
&format!("{}.service", service),
|
||||
"--property=LoadState,ActiveState,SubState,MemoryCurrent,NRestarts,ExecMainStartTimestamp"
|
||||
])
|
||||
.output()?;
|
||||
|
||||
let active_status = String::from_utf8(output.stdout)?.trim().to_string();
|
||||
let output_str = String::from_utf8(output.stdout)?;
|
||||
|
||||
// Get more detailed info (with 2 second timeout)
|
||||
let output = Command::new("timeout")
|
||||
.args(&["2", "systemctl", "show", &format!("{}.service", service), "--property=LoadState,ActiveState,SubState"])
|
||||
.output()?;
|
||||
// Parse properties
|
||||
let mut active_state = String::new();
|
||||
let mut memory_bytes = None;
|
||||
let mut restart_count = None;
|
||||
let mut start_timestamp = None;
|
||||
|
||||
let detailed_info = String::from_utf8(output.stdout)?;
|
||||
Ok((active_status, detailed_info))
|
||||
for line in output_str.lines() {
|
||||
if let Some(value) = line.strip_prefix("ActiveState=") {
|
||||
active_state = value.to_string();
|
||||
} else if let Some(value) = line.strip_prefix("MemoryCurrent=") {
|
||||
if value != "[not set]" {
|
||||
memory_bytes = value.parse().ok();
|
||||
}
|
||||
} else if let Some(value) = line.strip_prefix("NRestarts=") {
|
||||
restart_count = value.parse().ok();
|
||||
} else if let Some(value) = line.strip_prefix("ExecMainStartTimestamp=") {
|
||||
if value != "[not set]" && !value.is_empty() {
|
||||
// Parse timestamp to seconds since epoch
|
||||
if let Ok(output) = Command::new("date")
|
||||
.args(&["+%s", "-d", value])
|
||||
.output()
|
||||
{
|
||||
if let Ok(timestamp_str) = String::from_utf8(output.stdout) {
|
||||
start_timestamp = timestamp_str.trim().parse().ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ServiceStatusInfo {
|
||||
active_state,
|
||||
memory_bytes,
|
||||
restart_count,
|
||||
start_timestamp,
|
||||
})
|
||||
}
|
||||
|
||||
/// Check if service name matches pattern (supports wildcards like nginx*)
|
||||
|
||||
@ -31,6 +31,9 @@ struct ServiceInfo {
|
||||
metrics: Vec<(String, f32, Option<String>)>, // (label, value, unit)
|
||||
widget_status: Status,
|
||||
service_type: String, // "nginx_site", "container", "image", or empty for parent services
|
||||
memory_bytes: Option<u64>,
|
||||
restart_count: Option<u32>,
|
||||
uptime_seconds: Option<u64>,
|
||||
}
|
||||
|
||||
impl ServicesWidget {
|
||||
@ -84,7 +87,7 @@ impl ServicesWidget {
|
||||
// Convert Status enum to display text
|
||||
let status_str = match info.widget_status {
|
||||
Status::Ok => "active",
|
||||
Status::Inactive => "inactive",
|
||||
Status::Inactive => "inactive",
|
||||
Status::Critical => "failed",
|
||||
Status::Pending => "pending",
|
||||
Status::Warning => "warning",
|
||||
@ -92,9 +95,43 @@ impl ServicesWidget {
|
||||
Status::Offline => "offline",
|
||||
};
|
||||
|
||||
// Format memory
|
||||
let memory_str = info.memory_bytes.map_or("-".to_string(), |bytes| {
|
||||
let mb = bytes as f64 / (1024.0 * 1024.0);
|
||||
if mb >= 1000.0 {
|
||||
format!("{:.1}G", mb / 1024.0)
|
||||
} else {
|
||||
format!("{:.0}M", mb)
|
||||
}
|
||||
});
|
||||
|
||||
// Format uptime
|
||||
let uptime_str = info.uptime_seconds.map_or("-".to_string(), |secs| {
|
||||
let days = secs / 86400;
|
||||
let hours = (secs % 86400) / 3600;
|
||||
let mins = (secs % 3600) / 60;
|
||||
|
||||
if days > 0 {
|
||||
format!("{}d{}h", days, hours)
|
||||
} else if hours > 0 {
|
||||
format!("{}h{}m", hours, mins)
|
||||
} else {
|
||||
format!("{}m", mins)
|
||||
}
|
||||
});
|
||||
|
||||
// Format restarts (show "!" if > 0 to indicate instability)
|
||||
let restart_str = info.restart_count.map_or("-".to_string(), |count| {
|
||||
if count > 0 {
|
||||
format!("!{}", count)
|
||||
} else {
|
||||
"0".to_string()
|
||||
}
|
||||
});
|
||||
|
||||
format!(
|
||||
"{:<23} {:<10}",
|
||||
short_name, status_str
|
||||
"{:<23} {:<10} {:<8} {:<8} {:<5}",
|
||||
short_name, status_str, memory_str, uptime_str, restart_str
|
||||
)
|
||||
}
|
||||
|
||||
@ -280,6 +317,9 @@ impl Widget for ServicesWidget {
|
||||
metrics: Vec::new(), // Parent services don't have custom metrics
|
||||
widget_status: service.service_status,
|
||||
service_type: String::new(), // Parent services have no type
|
||||
memory_bytes: service.memory_bytes,
|
||||
restart_count: service.restart_count,
|
||||
uptime_seconds: service.uptime_seconds,
|
||||
};
|
||||
self.parent_services.insert(service.name.clone(), parent_info);
|
||||
|
||||
@ -296,6 +336,9 @@ impl Widget for ServicesWidget {
|
||||
metrics,
|
||||
widget_status: sub_service.service_status,
|
||||
service_type: sub_service.service_type.clone(),
|
||||
memory_bytes: None, // Sub-services don't have individual metrics yet
|
||||
restart_count: None,
|
||||
uptime_seconds: None,
|
||||
};
|
||||
sub_list.push((sub_service.name.clone(), sub_info));
|
||||
}
|
||||
@ -338,6 +381,9 @@ impl ServicesWidget {
|
||||
metrics: Vec::new(),
|
||||
widget_status: Status::Unknown,
|
||||
service_type: String::new(),
|
||||
memory_bytes: None,
|
||||
restart_count: None,
|
||||
uptime_seconds: None,
|
||||
});
|
||||
|
||||
if metric.name.ends_with("_status") {
|
||||
@ -364,6 +410,9 @@ impl ServicesWidget {
|
||||
metrics: Vec::new(),
|
||||
widget_status: Status::Unknown,
|
||||
service_type: String::new(), // Unknown type in legacy path
|
||||
memory_bytes: None,
|
||||
restart_count: None,
|
||||
uptime_seconds: None,
|
||||
},
|
||||
));
|
||||
&mut sub_service_list.last_mut().unwrap().1
|
||||
@ -429,8 +478,8 @@ impl ServicesWidget {
|
||||
|
||||
// Header
|
||||
let header = format!(
|
||||
"{:<25} {:<10}",
|
||||
"Service:", "Status:"
|
||||
"{:<25} {:<10} {:<8} {:<8} {:<5}",
|
||||
"Service:", "Status:", "RAM:", "Uptime:", "↻:"
|
||||
);
|
||||
let header_para = Paragraph::new(header).style(Typography::muted());
|
||||
frame.render_widget(header_para, content_chunks[0]);
|
||||
|
||||
@ -139,6 +139,12 @@ pub struct ServiceData {
|
||||
pub user_stopped: bool,
|
||||
pub service_status: Status,
|
||||
pub sub_services: Vec<SubServiceData>,
|
||||
/// Memory usage in bytes (from MemoryCurrent)
|
||||
pub memory_bytes: Option<u64>,
|
||||
/// Number of service restarts (from NRestarts)
|
||||
pub restart_count: Option<u32>,
|
||||
/// Uptime in seconds (calculated from ExecMainStartTimestamp)
|
||||
pub uptime_seconds: Option<u64>,
|
||||
}
|
||||
|
||||
/// Sub-service data (nginx sites, docker containers, etc.)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user