Optimize agent CPU usage with throttled service descriptions

This commit is contained in:
2025-10-12 15:13:42 +02:00
parent 656d410a7a
commit c3dbaeead2
2 changed files with 51 additions and 28 deletions

View File

@@ -32,11 +32,11 @@ impl ServiceCollector {
async fn get_service_status(&self, service: &str) -> Result<ServiceData, CollectorError> {
let timeout_duration = Duration::from_millis(self.timeout_ms);
// Get systemctl status
// Use more efficient systemctl command - just get the essential info
let status_output = timeout(
timeout_duration,
Command::new("systemctl")
.args(["show", service, "--property=ActiveState,SubState,MainPID"])
.args(["show", service, "--property=ActiveState,SubState,MainPID", "--no-pager"])
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output(),
@@ -83,11 +83,19 @@ impl ServiceCollector {
// Get memory quota from systemd if available
let memory_quota_mb = self.get_service_memory_limit(service).await.unwrap_or(0.0);
// Get disk usage for this service
let disk_used_gb = self.get_service_disk_usage(service).await.unwrap_or(0.0);
// Get disk usage for this service (only for running services)
let disk_used_gb = if matches!(status, ServiceStatus::Running) {
self.get_service_disk_usage(service).await.unwrap_or(0.0)
} else {
0.0
};
// Get service-specific description
let description = self.get_service_description(service).await;
// Get service-specific description (only for running services, and throttled)
let description = if matches!(status, ServiceStatus::Running) {
self.get_service_description_throttled(service).await
} else {
None
};
Ok(ServiceData {
name: service.to_string(),
@@ -151,27 +159,15 @@ impl ServiceCollector {
}
async fn get_service_disk_usage(&self, service: &str) -> Result<f32, CollectorError> {
// For systemd services, check if they have private /var directories or specific data paths
// This is a simplified implementation - could be enhanced to check actual service-specific paths
// Common service data directories to check
let potential_paths = vec![
format!("/var/lib/{}", service),
format!("/var/cache/{}", service),
format!("/var/log/{}", service),
format!("/opt/{}", service),
format!("/srv/{}", service),
];
let mut total_usage = 0.0;
for path in potential_paths {
if let Ok(usage) = self.get_directory_size(&path).await {
total_usage += usage;
}
// Only check the most likely path to avoid multiple du calls
let primary_path = format!("/var/lib/{}", service);
// Use a quick check first - if directory doesn't exist, don't run du
if tokio::fs::metadata(&primary_path).await.is_err() {
return Ok(0.0);
}
Ok(total_usage)
self.get_directory_size(&primary_path).await
}
async fn get_directory_size(&self, path: &str) -> Result<f32, CollectorError> {
@@ -205,7 +201,7 @@ impl ServiceCollector {
async fn get_service_memory_limit(&self, service: &str) -> Result<f32, CollectorError> {
let output = Command::new("systemctl")
.args(["show", service, "--property=MemoryMax"])
.args(["show", service, "--property=MemoryMax", "--no-pager"])
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output()
@@ -453,10 +449,34 @@ impl ServiceCollector {
}
}
async fn get_service_description_throttled(&self, service: &str) -> Option<String> {
// Simple time-based throttling - only run expensive descriptions every ~30 seconds
// Use a hash of the current time to spread out when different services get described
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs();
let service_hash = service.as_bytes().iter().fold(0u64, |acc, &b| {
acc.wrapping_mul(31).wrapping_add(b as u64)
});
// Each service gets its description updated every 30 seconds, but staggered
let update_interval = 30; // seconds
let service_offset = service_hash % update_interval;
if (now + service_offset) % update_interval == 0 {
self.get_service_description(service).await
} else {
None // Return None to indicate no new description this cycle
}
}
async fn get_service_description(&self, service: &str) -> Option<String> {
match service {
"sshd" | "ssh" => self.get_ssh_active_users().await,
"nginx" | "apache2" | "httpd" => self.get_web_server_connections().await,
"nginx" => self.get_web_server_connections().await, // Use same method for now
"apache2" | "httpd" => self.get_web_server_connections().await,
"docker" => self.get_docker_containers().await,
"postgresql" | "postgres" => self.get_postgres_connections().await,
"mysql" | "mariadb" => self.get_mysql_connections().await,
@@ -507,6 +527,7 @@ impl ServiceCollector {
}
async fn get_web_server_connections(&self) -> Option<String> {
// Use simpler ss command with minimal output
let output = Command::new("ss")
.args(["-tn", "state", "established", "sport", ":80", "or", "sport", ":443"])
.stdout(Stdio::piped())