Add service metrics from systemctl (memory, uptime, restarts)
Shared: - Add memory_bytes, restart_count, uptime_seconds to ServiceData Agent: - Add new fields to ServiceStatusInfo struct - Fetch MemoryCurrent, NRestarts, ExecMainStartTimestamp from systemctl show - Calculate uptime from start timestamp - Parse and populate new fields in ServiceData - Remove unused load_state and sub_state fields Dashboard: - Add memory_bytes, restart_count, uptime_seconds to ServiceInfo - Update header: Service, Status, RAM, Uptime, ↻ (restarts) - Format memory as MB/GB - Format uptime as Xd Xh, Xh Xm, or Xm - Show restart count with ! prefix if > 0 to indicate instability All metrics obtained from single systemctl show call - zero overhead.
This commit is contained in:
@@ -43,9 +43,10 @@ struct ServiceCacheState {
|
||||
/// Cached service status information from systemctl list-units
|
||||
#[derive(Debug, Clone)]
|
||||
struct ServiceStatusInfo {
|
||||
load_state: String,
|
||||
active_state: String,
|
||||
sub_state: String,
|
||||
memory_bytes: Option<u64>,
|
||||
restart_count: Option<u32>,
|
||||
start_timestamp: Option<u64>,
|
||||
}
|
||||
|
||||
impl SystemdCollector {
|
||||
@@ -86,11 +87,20 @@ impl SystemdCollector {
|
||||
let mut complete_service_data = Vec::new();
|
||||
for service_name in &monitored_services {
|
||||
match self.get_service_status(service_name) {
|
||||
Ok((active_status, _detailed_info)) => {
|
||||
Ok(status_info) => {
|
||||
let mut sub_services = Vec::new();
|
||||
|
||||
// Calculate uptime if we have start timestamp
|
||||
let uptime_seconds = status_info.start_timestamp.and_then(|start| {
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.ok()?
|
||||
.as_secs();
|
||||
Some(now.saturating_sub(start))
|
||||
});
|
||||
|
||||
// Sub-service metrics for specific services (always include cached results)
|
||||
if service_name.contains("nginx") && active_status == "active" {
|
||||
if service_name.contains("nginx") && status_info.active_state == "active" {
|
||||
let nginx_sites = self.get_nginx_site_metrics();
|
||||
for (site_name, latency_ms) in nginx_sites {
|
||||
let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms {
|
||||
@@ -115,7 +125,7 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
if service_name.contains("docker") && active_status == "active" {
|
||||
if service_name.contains("docker") && status_info.active_state == "active" {
|
||||
let docker_containers = self.get_docker_containers();
|
||||
for (container_name, container_status) in docker_containers {
|
||||
// For now, docker containers have no additional metrics
|
||||
@@ -153,8 +163,11 @@ impl SystemdCollector {
|
||||
let service_data = ServiceData {
|
||||
name: service_name.clone(),
|
||||
user_stopped: false, // TODO: Integrate with service tracker
|
||||
service_status: self.calculate_service_status(service_name, &active_status),
|
||||
service_status: self.calculate_service_status(service_name, &status_info.active_state),
|
||||
sub_services,
|
||||
memory_bytes: status_info.memory_bytes,
|
||||
restart_count: status_info.restart_count,
|
||||
uptime_seconds,
|
||||
};
|
||||
|
||||
// Add to AgentData and cache
|
||||
@@ -290,14 +303,13 @@ impl SystemdCollector {
|
||||
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
||||
let service_name = fields[0].trim_end_matches(".service");
|
||||
let load_state = fields.get(1).unwrap_or(&"unknown").to_string();
|
||||
let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
|
||||
let sub_state = fields.get(3).unwrap_or(&"unknown").to_string();
|
||||
|
||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||
load_state,
|
||||
active_state,
|
||||
sub_state,
|
||||
memory_bytes: None,
|
||||
restart_count: None,
|
||||
start_timestamp: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -306,9 +318,10 @@ impl SystemdCollector {
|
||||
for service_name in &all_service_names {
|
||||
if !status_cache.contains_key(service_name) {
|
||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||
load_state: "not-loaded".to_string(),
|
||||
active_state: "inactive".to_string(),
|
||||
sub_state: "dead".to_string(),
|
||||
memory_bytes: None,
|
||||
restart_count: None,
|
||||
start_timestamp: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -341,35 +354,63 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
/// Get service status from cache (if available) or fallback to systemctl
|
||||
fn get_service_status(&self, service: &str) -> Result<(String, String)> {
|
||||
fn get_service_status(&self, service: &str) -> Result<ServiceStatusInfo> {
|
||||
// Try to get status from cache first
|
||||
if let Ok(state) = self.state.read() {
|
||||
if let Some(cached_info) = state.service_status_cache.get(service) {
|
||||
let active_status = cached_info.active_state.clone();
|
||||
let detailed_info = format!(
|
||||
"LoadState={}\nActiveState={}\nSubState={}",
|
||||
cached_info.load_state,
|
||||
cached_info.active_state,
|
||||
cached_info.sub_state
|
||||
);
|
||||
return Ok((active_status, detailed_info));
|
||||
return Ok(cached_info.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to systemctl if not in cache (with 2 second timeout)
|
||||
let output = Command::new("timeout")
|
||||
.args(&["2", "systemctl", "is-active", &format!("{}.service", service)])
|
||||
.args(&[
|
||||
"2",
|
||||
"systemctl",
|
||||
"show",
|
||||
&format!("{}.service", service),
|
||||
"--property=LoadState,ActiveState,SubState,MemoryCurrent,NRestarts,ExecMainStartTimestamp"
|
||||
])
|
||||
.output()?;
|
||||
|
||||
let active_status = String::from_utf8(output.stdout)?.trim().to_string();
|
||||
let output_str = String::from_utf8(output.stdout)?;
|
||||
|
||||
// Get more detailed info (with 2 second timeout)
|
||||
let output = Command::new("timeout")
|
||||
.args(&["2", "systemctl", "show", &format!("{}.service", service), "--property=LoadState,ActiveState,SubState"])
|
||||
.output()?;
|
||||
// Parse properties
|
||||
let mut active_state = String::new();
|
||||
let mut memory_bytes = None;
|
||||
let mut restart_count = None;
|
||||
let mut start_timestamp = None;
|
||||
|
||||
let detailed_info = String::from_utf8(output.stdout)?;
|
||||
Ok((active_status, detailed_info))
|
||||
for line in output_str.lines() {
|
||||
if let Some(value) = line.strip_prefix("ActiveState=") {
|
||||
active_state = value.to_string();
|
||||
} else if let Some(value) = line.strip_prefix("MemoryCurrent=") {
|
||||
if value != "[not set]" {
|
||||
memory_bytes = value.parse().ok();
|
||||
}
|
||||
} else if let Some(value) = line.strip_prefix("NRestarts=") {
|
||||
restart_count = value.parse().ok();
|
||||
} else if let Some(value) = line.strip_prefix("ExecMainStartTimestamp=") {
|
||||
if value != "[not set]" && !value.is_empty() {
|
||||
// Parse timestamp to seconds since epoch
|
||||
if let Ok(output) = Command::new("date")
|
||||
.args(&["+%s", "-d", value])
|
||||
.output()
|
||||
{
|
||||
if let Ok(timestamp_str) = String::from_utf8(output.stdout) {
|
||||
start_timestamp = timestamp_str.trim().parse().ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ServiceStatusInfo {
|
||||
active_state,
|
||||
memory_bytes,
|
||||
restart_count,
|
||||
start_timestamp,
|
||||
})
|
||||
}
|
||||
|
||||
/// Check if service name matches pattern (supports wildcards like nginx*)
|
||||
|
||||
Reference in New Issue
Block a user