From c3dbaeead2f0a9751db21ca2839af54d82f31c77 Mon Sep 17 00:00:00 2001 From: Christoffer Martinsson Date: Sun, 12 Oct 2025 15:13:42 +0200 Subject: [PATCH] Optimize agent CPU usage with throttled service descriptions --- CLAUDE.md | 2 + agent/src/collectors/service.rs | 77 +++++++++++++++++++++------------ 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index bad4c2b..0a672c7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -209,3 +209,5 @@ Keys: [Enter] details [r]efresh [s]ort [f]ilter [q]uit NEVER write that you have "successfully implemented" something or generate extensive summary text without first verifying with the user that the implementation is correct. This wastes tokens. Keep responses concise. NEVER implement code without first getting explicit user agreement on the approach. Always ask for confirmation before proceeding with implementation. + +NEVER mention Claude or automation in commit messages. Keep commit messages focused on the technical changes only. diff --git a/agent/src/collectors/service.rs b/agent/src/collectors/service.rs index 083e2fd..0d483d6 100644 --- a/agent/src/collectors/service.rs +++ b/agent/src/collectors/service.rs @@ -32,11 +32,11 @@ impl ServiceCollector { async fn get_service_status(&self, service: &str) -> Result { let timeout_duration = Duration::from_millis(self.timeout_ms); - // Get systemctl status + // Use more efficient systemctl command - just get the essential info let status_output = timeout( timeout_duration, Command::new("systemctl") - .args(["show", service, "--property=ActiveState,SubState,MainPID"]) + .args(["show", service, "--property=ActiveState,SubState,MainPID", "--no-pager"]) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .output(), @@ -83,11 +83,19 @@ impl ServiceCollector { // Get memory quota from systemd if available let memory_quota_mb = self.get_service_memory_limit(service).await.unwrap_or(0.0); - // Get disk usage for this service - let disk_used_gb = self.get_service_disk_usage(service).await.unwrap_or(0.0); + // Get disk usage for this service (only for running services) + let disk_used_gb = if matches!(status, ServiceStatus::Running) { + self.get_service_disk_usage(service).await.unwrap_or(0.0) + } else { + 0.0 + }; - // Get service-specific description - let description = self.get_service_description(service).await; + // Get service-specific description (only for running services, and throttled) + let description = if matches!(status, ServiceStatus::Running) { + self.get_service_description_throttled(service).await + } else { + None + }; Ok(ServiceData { name: service.to_string(), @@ -151,27 +159,15 @@ impl ServiceCollector { } async fn get_service_disk_usage(&self, service: &str) -> Result { - // For systemd services, check if they have private /var directories or specific data paths - // This is a simplified implementation - could be enhanced to check actual service-specific paths - - // Common service data directories to check - let potential_paths = vec![ - format!("/var/lib/{}", service), - format!("/var/cache/{}", service), - format!("/var/log/{}", service), - format!("/opt/{}", service), - format!("/srv/{}", service), - ]; - - let mut total_usage = 0.0; - - for path in potential_paths { - if let Ok(usage) = self.get_directory_size(&path).await { - total_usage += usage; - } + // Only check the most likely path to avoid multiple du calls + let primary_path = format!("/var/lib/{}", service); + + // Use a quick check first - if directory doesn't exist, don't run du + if tokio::fs::metadata(&primary_path).await.is_err() { + return Ok(0.0); } - - Ok(total_usage) + + self.get_directory_size(&primary_path).await } async fn get_directory_size(&self, path: &str) -> Result { @@ -205,7 +201,7 @@ impl ServiceCollector { async fn get_service_memory_limit(&self, service: &str) -> Result { let output = Command::new("systemctl") - .args(["show", service, "--property=MemoryMax"]) + .args(["show", service, "--property=MemoryMax", "--no-pager"]) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .output() @@ -453,10 +449,34 @@ impl ServiceCollector { } } + async fn get_service_description_throttled(&self, service: &str) -> Option { + // Simple time-based throttling - only run expensive descriptions every ~30 seconds + // Use a hash of the current time to spread out when different services get described + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs(); + + let service_hash = service.as_bytes().iter().fold(0u64, |acc, &b| { + acc.wrapping_mul(31).wrapping_add(b as u64) + }); + + // Each service gets its description updated every 30 seconds, but staggered + let update_interval = 30; // seconds + let service_offset = service_hash % update_interval; + + if (now + service_offset) % update_interval == 0 { + self.get_service_description(service).await + } else { + None // Return None to indicate no new description this cycle + } + } + async fn get_service_description(&self, service: &str) -> Option { match service { "sshd" | "ssh" => self.get_ssh_active_users().await, - "nginx" | "apache2" | "httpd" => self.get_web_server_connections().await, + "nginx" => self.get_web_server_connections().await, // Use same method for now + "apache2" | "httpd" => self.get_web_server_connections().await, "docker" => self.get_docker_containers().await, "postgresql" | "postgres" => self.get_postgres_connections().await, "mysql" | "mariadb" => self.get_mysql_connections().await, @@ -507,6 +527,7 @@ impl ServiceCollector { } async fn get_web_server_connections(&self) -> Option { + // Use simpler ss command with minimal output let output = Command::new("ss") .args(["-tn", "state", "established", "sport", ":80", "or", "sport", ":443"]) .stdout(Stdio::piped())