Reduce all command timeouts to 2-3 seconds max
With 10-second host heartbeat timeout, all command timeouts must be significantly lower to ensure total collection time stays under 10 seconds. Changed timeouts: - smartctl: 10s → 3s (critical: multiple drives queried sequentially) - du: 5s → 2s - lsblk: 5s → 2s - systemctl list commands: 5s → 3s - systemctl show/is-active: 3s → 2s - docker commands: 5s → 3s - df, ip commands: 3s → 2s Total worst-case collection time now capped at more reasonable levels, preventing false host offline alerts from blocking operations.
This commit is contained in:
@@ -117,7 +117,7 @@ impl DiskCollector {
|
||||
let mut cmd = Command::new("lsblk");
|
||||
cmd.args(&["-rn", "-o", "NAME,MOUNTPOINT"]);
|
||||
|
||||
let output = run_command_with_timeout(cmd, 5).await
|
||||
let output = run_command_with_timeout(cmd, 2).await
|
||||
.map_err(|e| CollectorError::SystemRead {
|
||||
path: "block devices".to_string(),
|
||||
error: e.to_string(),
|
||||
@@ -427,7 +427,7 @@ impl DiskCollector {
|
||||
cmd.args(&["-a", &format!("/dev/{}", drive_name)]);
|
||||
}
|
||||
|
||||
let output = run_command_with_timeout(cmd, 10).await
|
||||
let output = run_command_with_timeout(cmd, 3).await
|
||||
.map_err(|e| CollectorError::SystemRead {
|
||||
path: format!("SMART data for {}", drive_name),
|
||||
error: e.to_string(),
|
||||
@@ -764,7 +764,7 @@ impl DiskCollector {
|
||||
fn get_drive_info_for_path(&self, path: &str) -> anyhow::Result<PoolDrive> {
|
||||
// Use lsblk to find the backing device with timeout
|
||||
let output = Command::new("timeout")
|
||||
.args(&["5", "lsblk", "-rn", "-o", "NAME,MOUNTPOINT"])
|
||||
.args(&["2", "lsblk", "-rn", "-o", "NAME,MOUNTPOINT"])
|
||||
.output()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to run lsblk: {}", e))?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user