Phase 4: Cache service status from discovery to eliminate per-service calls
Major performance optimization: - Parse and cache service status during discovery from systemctl list-units - Eliminate per-service systemctl is-active and show calls - Reduce systemctl calls from 1+2N to just 1 call total - For 10 services: 21 calls → 1 call (95% reduction) - Add fallback to systemctl for cache misses This completes the major systemctl call reduction goal from TODO.md.
This commit is contained in:
parent
174b27f31a
commit
da6f3c3855
@ -22,6 +22,8 @@ pub struct SystemdCollector {
|
|||||||
struct ServiceCacheState {
|
struct ServiceCacheState {
|
||||||
/// Interesting services to monitor (cached after discovery)
|
/// Interesting services to monitor (cached after discovery)
|
||||||
monitored_services: Vec<String>,
|
monitored_services: Vec<String>,
|
||||||
|
/// Cached service status information from discovery
|
||||||
|
service_status_cache: std::collections::HashMap<String, ServiceStatusInfo>,
|
||||||
/// Last time services were discovered
|
/// Last time services were discovered
|
||||||
last_discovery_time: Option<Instant>,
|
last_discovery_time: Option<Instant>,
|
||||||
/// How often to rediscover services (5 minutes)
|
/// How often to rediscover services (5 minutes)
|
||||||
@ -34,11 +36,21 @@ struct ServiceCacheState {
|
|||||||
nginx_check_interval_seconds: u64,
|
nginx_check_interval_seconds: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Cached service status information from systemctl list-units
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct ServiceStatusInfo {
|
||||||
|
load_state: String,
|
||||||
|
active_state: String,
|
||||||
|
sub_state: String,
|
||||||
|
description: String,
|
||||||
|
}
|
||||||
|
|
||||||
impl SystemdCollector {
|
impl SystemdCollector {
|
||||||
pub fn new(config: SystemdConfig) -> Self {
|
pub fn new(config: SystemdConfig) -> Self {
|
||||||
Self {
|
Self {
|
||||||
state: RwLock::new(ServiceCacheState {
|
state: RwLock::new(ServiceCacheState {
|
||||||
monitored_services: Vec::new(),
|
monitored_services: Vec::new(),
|
||||||
|
service_status_cache: std::collections::HashMap::new(),
|
||||||
last_discovery_time: None,
|
last_discovery_time: None,
|
||||||
discovery_interval_seconds: 300, // 5 minutes
|
discovery_interval_seconds: 300, // 5 minutes
|
||||||
nginx_site_metrics: Vec::new(),
|
nginx_site_metrics: Vec::new(),
|
||||||
@ -135,13 +147,33 @@ impl SystemdCollector {
|
|||||||
let excluded_services = &self.config.excluded_services;
|
let excluded_services = &self.config.excluded_services;
|
||||||
let service_name_filters = &self.config.service_name_filters;
|
let service_name_filters = &self.config.service_name_filters;
|
||||||
|
|
||||||
// Parse all services from single systemctl command
|
// Parse all services and cache their status information
|
||||||
let mut all_service_names = std::collections::HashSet::new();
|
let mut all_service_names = std::collections::HashSet::new();
|
||||||
|
let mut status_cache = std::collections::HashMap::new();
|
||||||
|
|
||||||
for line in units_str.lines() {
|
for line in units_str.lines() {
|
||||||
let fields: Vec<&str> = line.split_whitespace().collect();
|
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||||
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
||||||
let service_name = fields[0].trim_end_matches(".service");
|
let service_name = fields[0].trim_end_matches(".service");
|
||||||
|
|
||||||
|
// Extract status information from systemctl list-units output
|
||||||
|
let load_state = fields.get(1).unwrap_or(&"unknown").to_string();
|
||||||
|
let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
|
||||||
|
let sub_state = fields.get(3).unwrap_or(&"unknown").to_string();
|
||||||
|
let description = if fields.len() > 4 {
|
||||||
|
fields[4..].join(" ")
|
||||||
|
} else {
|
||||||
|
"".to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Cache the status information
|
||||||
|
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||||
|
load_state,
|
||||||
|
active_state,
|
||||||
|
sub_state,
|
||||||
|
description,
|
||||||
|
});
|
||||||
|
|
||||||
all_service_names.insert(service_name.to_string());
|
all_service_names.insert(service_name.to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -182,6 +214,10 @@ impl SystemdCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the cache with all discovered service status information
|
||||||
|
if let Ok(mut state) = self.state.write() {
|
||||||
|
state.service_status_cache = status_cache;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(services)
|
Ok(services)
|
||||||
}
|
}
|
||||||
@ -242,8 +278,24 @@ impl SystemdCollector {
|
|||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get service status using systemctl
|
/// Get service status from cache (if available) or fallback to systemctl
|
||||||
fn get_service_status(&self, service: &str) -> Result<(String, String)> {
|
fn get_service_status(&self, service: &str) -> Result<(String, String)> {
|
||||||
|
// Try to get status from cache first
|
||||||
|
if let Ok(state) = self.state.read() {
|
||||||
|
if let Some(cached_info) = state.service_status_cache.get(service) {
|
||||||
|
let active_status = cached_info.active_state.clone();
|
||||||
|
let detailed_info = format!(
|
||||||
|
"LoadState={}\nActiveState={}\nSubState={}",
|
||||||
|
cached_info.load_state,
|
||||||
|
cached_info.active_state,
|
||||||
|
cached_info.sub_state
|
||||||
|
);
|
||||||
|
return Ok((active_status, detailed_info));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to systemctl if not in cache (shouldn't happen during normal operation)
|
||||||
|
debug!("Service '{}' not found in cache, falling back to systemctl", service);
|
||||||
let output = Command::new("systemctl")
|
let output = Command::new("systemctl")
|
||||||
.arg("is-active")
|
.arg("is-active")
|
||||||
.arg(format!("{}.service", service))
|
.arg(format!("{}.service", service))
|
||||||
@ -253,10 +305,10 @@ impl SystemdCollector {
|
|||||||
|
|
||||||
// Get more detailed info
|
// Get more detailed info
|
||||||
let output = Command::new("systemctl")
|
let output = Command::new("systemctl")
|
||||||
.arg("show")
|
.arg("show")
|
||||||
.arg(format!("{}.service", service))
|
.arg(format!("{}.service", service))
|
||||||
.arg("--property=LoadState,ActiveState,SubState")
|
.arg("--property=LoadState,ActiveState,SubState")
|
||||||
.output()?;
|
.output()?;
|
||||||
|
|
||||||
let detailed_info = String::from_utf8(output.stdout)?;
|
let detailed_info = String::from_utf8(output.stdout)?;
|
||||||
Ok((active_status, detailed_info))
|
Ok((active_status, detailed_info))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user