Optimize agent CPU usage with throttled service descriptions

This commit is contained in:
Christoffer Martinsson 2025-10-12 15:13:42 +02:00
parent 656d410a7a
commit c3dbaeead2
2 changed files with 51 additions and 28 deletions

View File

@ -209,3 +209,5 @@ Keys: [Enter] details [r]efresh [s]ort [f]ilter [q]uit
NEVER write that you have "successfully implemented" something or generate extensive summary text without first verifying with the user that the implementation is correct. This wastes tokens. Keep responses concise.
NEVER implement code without first getting explicit user agreement on the approach. Always ask for confirmation before proceeding with implementation.
NEVER mention Claude or automation in commit messages. Keep commit messages focused on the technical changes only.

View File

@ -32,11 +32,11 @@ impl ServiceCollector {
async fn get_service_status(&self, service: &str) -> Result<ServiceData, CollectorError> {
let timeout_duration = Duration::from_millis(self.timeout_ms);
// Get systemctl status
// Use more efficient systemctl command - just get the essential info
let status_output = timeout(
timeout_duration,
Command::new("systemctl")
.args(["show", service, "--property=ActiveState,SubState,MainPID"])
.args(["show", service, "--property=ActiveState,SubState,MainPID", "--no-pager"])
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output(),
@ -83,11 +83,19 @@ impl ServiceCollector {
// Get memory quota from systemd if available
let memory_quota_mb = self.get_service_memory_limit(service).await.unwrap_or(0.0);
// Get disk usage for this service
let disk_used_gb = self.get_service_disk_usage(service).await.unwrap_or(0.0);
// Get disk usage for this service (only for running services)
let disk_used_gb = if matches!(status, ServiceStatus::Running) {
self.get_service_disk_usage(service).await.unwrap_or(0.0)
} else {
0.0
};
// Get service-specific description
let description = self.get_service_description(service).await;
// Get service-specific description (only for running services, and throttled)
let description = if matches!(status, ServiceStatus::Running) {
self.get_service_description_throttled(service).await
} else {
None
};
Ok(ServiceData {
name: service.to_string(),
@ -151,27 +159,15 @@ impl ServiceCollector {
}
async fn get_service_disk_usage(&self, service: &str) -> Result<f32, CollectorError> {
// For systemd services, check if they have private /var directories or specific data paths
// This is a simplified implementation - could be enhanced to check actual service-specific paths
// Common service data directories to check
let potential_paths = vec![
format!("/var/lib/{}", service),
format!("/var/cache/{}", service),
format!("/var/log/{}", service),
format!("/opt/{}", service),
format!("/srv/{}", service),
];
let mut total_usage = 0.0;
for path in potential_paths {
if let Ok(usage) = self.get_directory_size(&path).await {
total_usage += usage;
}
// Only check the most likely path to avoid multiple du calls
let primary_path = format!("/var/lib/{}", service);
// Use a quick check first - if directory doesn't exist, don't run du
if tokio::fs::metadata(&primary_path).await.is_err() {
return Ok(0.0);
}
Ok(total_usage)
self.get_directory_size(&primary_path).await
}
async fn get_directory_size(&self, path: &str) -> Result<f32, CollectorError> {
@ -205,7 +201,7 @@ impl ServiceCollector {
async fn get_service_memory_limit(&self, service: &str) -> Result<f32, CollectorError> {
let output = Command::new("systemctl")
.args(["show", service, "--property=MemoryMax"])
.args(["show", service, "--property=MemoryMax", "--no-pager"])
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output()
@ -453,10 +449,34 @@ impl ServiceCollector {
}
}
async fn get_service_description_throttled(&self, service: &str) -> Option<String> {
// Simple time-based throttling - only run expensive descriptions every ~30 seconds
// Use a hash of the current time to spread out when different services get described
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs();
let service_hash = service.as_bytes().iter().fold(0u64, |acc, &b| {
acc.wrapping_mul(31).wrapping_add(b as u64)
});
// Each service gets its description updated every 30 seconds, but staggered
let update_interval = 30; // seconds
let service_offset = service_hash % update_interval;
if (now + service_offset) % update_interval == 0 {
self.get_service_description(service).await
} else {
None // Return None to indicate no new description this cycle
}
}
async fn get_service_description(&self, service: &str) -> Option<String> {
match service {
"sshd" | "ssh" => self.get_ssh_active_users().await,
"nginx" | "apache2" | "httpd" => self.get_web_server_connections().await,
"nginx" => self.get_web_server_connections().await, // Use same method for now
"apache2" | "httpd" => self.get_web_server_connections().await,
"docker" => self.get_docker_containers().await,
"postgresql" | "postgres" => self.get_postgres_connections().await,
"mysql" | "mariadb" => self.get_mysql_connections().await,
@ -507,6 +527,7 @@ impl ServiceCollector {
}
async fn get_web_server_connections(&self) -> Option<String> {
// Use simpler ss command with minimal output
let output = Command::new("ss")
.args(["-tn", "state", "established", "sport", ":80", "or", "sport", ":443"])
.stdout(Stdio::piped())