Add service metrics from systemctl (memory, uptime, restarts)
Shared: - Add memory_bytes, restart_count, uptime_seconds to ServiceData Agent: - Add new fields to ServiceStatusInfo struct - Fetch MemoryCurrent, NRestarts, ExecMainStartTimestamp from systemctl show - Calculate uptime from start timestamp - Parse and populate new fields in ServiceData - Remove unused load_state and sub_state fields Dashboard: - Add memory_bytes, restart_count, uptime_seconds to ServiceInfo - Update header: Service, Status, RAM, Uptime, ↻ (restarts) - Format memory as MB/GB - Format uptime as Xd Xh, Xh Xm, or Xm - Show restart count with ! prefix if > 0 to indicate instability All metrics obtained from single systemctl show call - zero overhead.
This commit is contained in:
parent
c3c9507a42
commit
0e01813ff5
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.204"
|
version = "0.1.205"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
@ -301,7 +301,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.204"
|
version = "0.1.205"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@ -324,7 +324,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.204"
|
version = "0.1.205"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@ -43,9 +43,10 @@ struct ServiceCacheState {
|
|||||||
/// Cached service status information from systemctl list-units
|
/// Cached service status information from systemctl list-units
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct ServiceStatusInfo {
|
struct ServiceStatusInfo {
|
||||||
load_state: String,
|
|
||||||
active_state: String,
|
active_state: String,
|
||||||
sub_state: String,
|
memory_bytes: Option<u64>,
|
||||||
|
restart_count: Option<u32>,
|
||||||
|
start_timestamp: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SystemdCollector {
|
impl SystemdCollector {
|
||||||
@ -86,11 +87,20 @@ impl SystemdCollector {
|
|||||||
let mut complete_service_data = Vec::new();
|
let mut complete_service_data = Vec::new();
|
||||||
for service_name in &monitored_services {
|
for service_name in &monitored_services {
|
||||||
match self.get_service_status(service_name) {
|
match self.get_service_status(service_name) {
|
||||||
Ok((active_status, _detailed_info)) => {
|
Ok(status_info) => {
|
||||||
let mut sub_services = Vec::new();
|
let mut sub_services = Vec::new();
|
||||||
|
|
||||||
|
// Calculate uptime if we have start timestamp
|
||||||
|
let uptime_seconds = status_info.start_timestamp.and_then(|start| {
|
||||||
|
let now = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.ok()?
|
||||||
|
.as_secs();
|
||||||
|
Some(now.saturating_sub(start))
|
||||||
|
});
|
||||||
|
|
||||||
// Sub-service metrics for specific services (always include cached results)
|
// Sub-service metrics for specific services (always include cached results)
|
||||||
if service_name.contains("nginx") && active_status == "active" {
|
if service_name.contains("nginx") && status_info.active_state == "active" {
|
||||||
let nginx_sites = self.get_nginx_site_metrics();
|
let nginx_sites = self.get_nginx_site_metrics();
|
||||||
for (site_name, latency_ms) in nginx_sites {
|
for (site_name, latency_ms) in nginx_sites {
|
||||||
let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms {
|
let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms {
|
||||||
@ -115,7 +125,7 @@ impl SystemdCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if service_name.contains("docker") && active_status == "active" {
|
if service_name.contains("docker") && status_info.active_state == "active" {
|
||||||
let docker_containers = self.get_docker_containers();
|
let docker_containers = self.get_docker_containers();
|
||||||
for (container_name, container_status) in docker_containers {
|
for (container_name, container_status) in docker_containers {
|
||||||
// For now, docker containers have no additional metrics
|
// For now, docker containers have no additional metrics
|
||||||
@ -153,8 +163,11 @@ impl SystemdCollector {
|
|||||||
let service_data = ServiceData {
|
let service_data = ServiceData {
|
||||||
name: service_name.clone(),
|
name: service_name.clone(),
|
||||||
user_stopped: false, // TODO: Integrate with service tracker
|
user_stopped: false, // TODO: Integrate with service tracker
|
||||||
service_status: self.calculate_service_status(service_name, &active_status),
|
service_status: self.calculate_service_status(service_name, &status_info.active_state),
|
||||||
sub_services,
|
sub_services,
|
||||||
|
memory_bytes: status_info.memory_bytes,
|
||||||
|
restart_count: status_info.restart_count,
|
||||||
|
uptime_seconds,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Add to AgentData and cache
|
// Add to AgentData and cache
|
||||||
@ -290,14 +303,13 @@ impl SystemdCollector {
|
|||||||
let fields: Vec<&str> = line.split_whitespace().collect();
|
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||||
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
||||||
let service_name = fields[0].trim_end_matches(".service");
|
let service_name = fields[0].trim_end_matches(".service");
|
||||||
let load_state = fields.get(1).unwrap_or(&"unknown").to_string();
|
|
||||||
let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
|
let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
|
||||||
let sub_state = fields.get(3).unwrap_or(&"unknown").to_string();
|
|
||||||
|
|
||||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||||
load_state,
|
|
||||||
active_state,
|
active_state,
|
||||||
sub_state,
|
memory_bytes: None,
|
||||||
|
restart_count: None,
|
||||||
|
start_timestamp: None,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -306,9 +318,10 @@ impl SystemdCollector {
|
|||||||
for service_name in &all_service_names {
|
for service_name in &all_service_names {
|
||||||
if !status_cache.contains_key(service_name) {
|
if !status_cache.contains_key(service_name) {
|
||||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||||
load_state: "not-loaded".to_string(),
|
|
||||||
active_state: "inactive".to_string(),
|
active_state: "inactive".to_string(),
|
||||||
sub_state: "dead".to_string(),
|
memory_bytes: None,
|
||||||
|
restart_count: None,
|
||||||
|
start_timestamp: None,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -341,35 +354,63 @@ impl SystemdCollector {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Get service status from cache (if available) or fallback to systemctl
|
/// Get service status from cache (if available) or fallback to systemctl
|
||||||
fn get_service_status(&self, service: &str) -> Result<(String, String)> {
|
fn get_service_status(&self, service: &str) -> Result<ServiceStatusInfo> {
|
||||||
// Try to get status from cache first
|
// Try to get status from cache first
|
||||||
if let Ok(state) = self.state.read() {
|
if let Ok(state) = self.state.read() {
|
||||||
if let Some(cached_info) = state.service_status_cache.get(service) {
|
if let Some(cached_info) = state.service_status_cache.get(service) {
|
||||||
let active_status = cached_info.active_state.clone();
|
return Ok(cached_info.clone());
|
||||||
let detailed_info = format!(
|
|
||||||
"LoadState={}\nActiveState={}\nSubState={}",
|
|
||||||
cached_info.load_state,
|
|
||||||
cached_info.active_state,
|
|
||||||
cached_info.sub_state
|
|
||||||
);
|
|
||||||
return Ok((active_status, detailed_info));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to systemctl if not in cache (with 2 second timeout)
|
// Fallback to systemctl if not in cache (with 2 second timeout)
|
||||||
let output = Command::new("timeout")
|
let output = Command::new("timeout")
|
||||||
.args(&["2", "systemctl", "is-active", &format!("{}.service", service)])
|
.args(&[
|
||||||
|
"2",
|
||||||
|
"systemctl",
|
||||||
|
"show",
|
||||||
|
&format!("{}.service", service),
|
||||||
|
"--property=LoadState,ActiveState,SubState,MemoryCurrent,NRestarts,ExecMainStartTimestamp"
|
||||||
|
])
|
||||||
.output()?;
|
.output()?;
|
||||||
|
|
||||||
let active_status = String::from_utf8(output.stdout)?.trim().to_string();
|
let output_str = String::from_utf8(output.stdout)?;
|
||||||
|
|
||||||
// Get more detailed info (with 2 second timeout)
|
// Parse properties
|
||||||
let output = Command::new("timeout")
|
let mut active_state = String::new();
|
||||||
.args(&["2", "systemctl", "show", &format!("{}.service", service), "--property=LoadState,ActiveState,SubState"])
|
let mut memory_bytes = None;
|
||||||
.output()?;
|
let mut restart_count = None;
|
||||||
|
let mut start_timestamp = None;
|
||||||
|
|
||||||
let detailed_info = String::from_utf8(output.stdout)?;
|
for line in output_str.lines() {
|
||||||
Ok((active_status, detailed_info))
|
if let Some(value) = line.strip_prefix("ActiveState=") {
|
||||||
|
active_state = value.to_string();
|
||||||
|
} else if let Some(value) = line.strip_prefix("MemoryCurrent=") {
|
||||||
|
if value != "[not set]" {
|
||||||
|
memory_bytes = value.parse().ok();
|
||||||
|
}
|
||||||
|
} else if let Some(value) = line.strip_prefix("NRestarts=") {
|
||||||
|
restart_count = value.parse().ok();
|
||||||
|
} else if let Some(value) = line.strip_prefix("ExecMainStartTimestamp=") {
|
||||||
|
if value != "[not set]" && !value.is_empty() {
|
||||||
|
// Parse timestamp to seconds since epoch
|
||||||
|
if let Ok(output) = Command::new("date")
|
||||||
|
.args(&["+%s", "-d", value])
|
||||||
|
.output()
|
||||||
|
{
|
||||||
|
if let Ok(timestamp_str) = String::from_utf8(output.stdout) {
|
||||||
|
start_timestamp = timestamp_str.trim().parse().ok();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ServiceStatusInfo {
|
||||||
|
active_state,
|
||||||
|
memory_bytes,
|
||||||
|
restart_count,
|
||||||
|
start_timestamp,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if service name matches pattern (supports wildcards like nginx*)
|
/// Check if service name matches pattern (supports wildcards like nginx*)
|
||||||
|
|||||||
@ -31,6 +31,9 @@ struct ServiceInfo {
|
|||||||
metrics: Vec<(String, f32, Option<String>)>, // (label, value, unit)
|
metrics: Vec<(String, f32, Option<String>)>, // (label, value, unit)
|
||||||
widget_status: Status,
|
widget_status: Status,
|
||||||
service_type: String, // "nginx_site", "container", "image", or empty for parent services
|
service_type: String, // "nginx_site", "container", "image", or empty for parent services
|
||||||
|
memory_bytes: Option<u64>,
|
||||||
|
restart_count: Option<u32>,
|
||||||
|
uptime_seconds: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ServicesWidget {
|
impl ServicesWidget {
|
||||||
@ -92,9 +95,43 @@ impl ServicesWidget {
|
|||||||
Status::Offline => "offline",
|
Status::Offline => "offline",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Format memory
|
||||||
|
let memory_str = info.memory_bytes.map_or("-".to_string(), |bytes| {
|
||||||
|
let mb = bytes as f64 / (1024.0 * 1024.0);
|
||||||
|
if mb >= 1000.0 {
|
||||||
|
format!("{:.1}G", mb / 1024.0)
|
||||||
|
} else {
|
||||||
|
format!("{:.0}M", mb)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Format uptime
|
||||||
|
let uptime_str = info.uptime_seconds.map_or("-".to_string(), |secs| {
|
||||||
|
let days = secs / 86400;
|
||||||
|
let hours = (secs % 86400) / 3600;
|
||||||
|
let mins = (secs % 3600) / 60;
|
||||||
|
|
||||||
|
if days > 0 {
|
||||||
|
format!("{}d{}h", days, hours)
|
||||||
|
} else if hours > 0 {
|
||||||
|
format!("{}h{}m", hours, mins)
|
||||||
|
} else {
|
||||||
|
format!("{}m", mins)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Format restarts (show "!" if > 0 to indicate instability)
|
||||||
|
let restart_str = info.restart_count.map_or("-".to_string(), |count| {
|
||||||
|
if count > 0 {
|
||||||
|
format!("!{}", count)
|
||||||
|
} else {
|
||||||
|
"0".to_string()
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
format!(
|
format!(
|
||||||
"{:<23} {:<10}",
|
"{:<23} {:<10} {:<8} {:<8} {:<5}",
|
||||||
short_name, status_str
|
short_name, status_str, memory_str, uptime_str, restart_str
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -280,6 +317,9 @@ impl Widget for ServicesWidget {
|
|||||||
metrics: Vec::new(), // Parent services don't have custom metrics
|
metrics: Vec::new(), // Parent services don't have custom metrics
|
||||||
widget_status: service.service_status,
|
widget_status: service.service_status,
|
||||||
service_type: String::new(), // Parent services have no type
|
service_type: String::new(), // Parent services have no type
|
||||||
|
memory_bytes: service.memory_bytes,
|
||||||
|
restart_count: service.restart_count,
|
||||||
|
uptime_seconds: service.uptime_seconds,
|
||||||
};
|
};
|
||||||
self.parent_services.insert(service.name.clone(), parent_info);
|
self.parent_services.insert(service.name.clone(), parent_info);
|
||||||
|
|
||||||
@ -296,6 +336,9 @@ impl Widget for ServicesWidget {
|
|||||||
metrics,
|
metrics,
|
||||||
widget_status: sub_service.service_status,
|
widget_status: sub_service.service_status,
|
||||||
service_type: sub_service.service_type.clone(),
|
service_type: sub_service.service_type.clone(),
|
||||||
|
memory_bytes: None, // Sub-services don't have individual metrics yet
|
||||||
|
restart_count: None,
|
||||||
|
uptime_seconds: None,
|
||||||
};
|
};
|
||||||
sub_list.push((sub_service.name.clone(), sub_info));
|
sub_list.push((sub_service.name.clone(), sub_info));
|
||||||
}
|
}
|
||||||
@ -338,6 +381,9 @@ impl ServicesWidget {
|
|||||||
metrics: Vec::new(),
|
metrics: Vec::new(),
|
||||||
widget_status: Status::Unknown,
|
widget_status: Status::Unknown,
|
||||||
service_type: String::new(),
|
service_type: String::new(),
|
||||||
|
memory_bytes: None,
|
||||||
|
restart_count: None,
|
||||||
|
uptime_seconds: None,
|
||||||
});
|
});
|
||||||
|
|
||||||
if metric.name.ends_with("_status") {
|
if metric.name.ends_with("_status") {
|
||||||
@ -364,6 +410,9 @@ impl ServicesWidget {
|
|||||||
metrics: Vec::new(),
|
metrics: Vec::new(),
|
||||||
widget_status: Status::Unknown,
|
widget_status: Status::Unknown,
|
||||||
service_type: String::new(), // Unknown type in legacy path
|
service_type: String::new(), // Unknown type in legacy path
|
||||||
|
memory_bytes: None,
|
||||||
|
restart_count: None,
|
||||||
|
uptime_seconds: None,
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
&mut sub_service_list.last_mut().unwrap().1
|
&mut sub_service_list.last_mut().unwrap().1
|
||||||
@ -429,8 +478,8 @@ impl ServicesWidget {
|
|||||||
|
|
||||||
// Header
|
// Header
|
||||||
let header = format!(
|
let header = format!(
|
||||||
"{:<25} {:<10}",
|
"{:<25} {:<10} {:<8} {:<8} {:<5}",
|
||||||
"Service:", "Status:"
|
"Service:", "Status:", "RAM:", "Uptime:", "↻:"
|
||||||
);
|
);
|
||||||
let header_para = Paragraph::new(header).style(Typography::muted());
|
let header_para = Paragraph::new(header).style(Typography::muted());
|
||||||
frame.render_widget(header_para, content_chunks[0]);
|
frame.render_widget(header_para, content_chunks[0]);
|
||||||
|
|||||||
@ -139,6 +139,12 @@ pub struct ServiceData {
|
|||||||
pub user_stopped: bool,
|
pub user_stopped: bool,
|
||||||
pub service_status: Status,
|
pub service_status: Status,
|
||||||
pub sub_services: Vec<SubServiceData>,
|
pub sub_services: Vec<SubServiceData>,
|
||||||
|
/// Memory usage in bytes (from MemoryCurrent)
|
||||||
|
pub memory_bytes: Option<u64>,
|
||||||
|
/// Number of service restarts (from NRestarts)
|
||||||
|
pub restart_count: Option<u32>,
|
||||||
|
/// Uptime in seconds (calculated from ExecMainStartTimestamp)
|
||||||
|
pub uptime_seconds: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sub-service data (nginx sites, docker containers, etc.)
|
/// Sub-service data (nginx sites, docker containers, etc.)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user