Fix blocking smartctl commands with proper async/timeout handling
All checks were successful
Build and Release / build-and-release (push) Successful in 2m2s
All checks were successful
Build and Release / build-and-release (push) Successful in 2m2s
- Changed disk collector to use tokio::process::Command instead of std::process::Command - Updated run_command_with_timeout to properly kill processes on timeout - Fixes issue where smartctl hangs on problematic drives (/dev/sda) freezing entire agent - Timeout now force-kills hung processes using kill -9, preventing orphaned smartctl processes This resolves the issue where Data_3 showed unknown status because smartctl was hanging indefinitely trying to read from a problematic drive, blocking the entire collector. Bump version to v0.1.220 Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
929870f8b6
commit
77bf08a978
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.217"
|
version = "0.1.219"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
@ -301,7 +301,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.217"
|
version = "0.1.219"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@ -324,7 +324,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.217"
|
version = "0.1.219"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.219"
|
version = "0.1.220"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@ -3,7 +3,7 @@ use async_trait::async_trait;
|
|||||||
use cm_dashboard_shared::{AgentData, DriveData, FilesystemData, PoolData, HysteresisThresholds, Status};
|
use cm_dashboard_shared::{AgentData, DriveData, FilesystemData, PoolData, HysteresisThresholds, Status};
|
||||||
|
|
||||||
use crate::config::DiskConfig;
|
use crate::config::DiskConfig;
|
||||||
use std::process::Command;
|
use tokio::process::Command;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
@ -763,7 +763,7 @@ impl DiskCollector {
|
|||||||
/// Get drive information for a mount path
|
/// Get drive information for a mount path
|
||||||
fn get_drive_info_for_path(&self, path: &str) -> anyhow::Result<PoolDrive> {
|
fn get_drive_info_for_path(&self, path: &str) -> anyhow::Result<PoolDrive> {
|
||||||
// Use lsblk to find the backing device with timeout
|
// Use lsblk to find the backing device with timeout
|
||||||
let output = Command::new("timeout")
|
let output = std::process::Command::new("timeout")
|
||||||
.args(&["2", "lsblk", "-rn", "-o", "NAME,MOUNTPOINT"])
|
.args(&["2", "lsblk", "-rn", "-o", "NAME,MOUNTPOINT"])
|
||||||
.output()
|
.output()
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to run lsblk: {}", e))?;
|
.map_err(|e| anyhow::anyhow!("Failed to run lsblk: {}", e))?;
|
||||||
|
|||||||
@ -1,8 +1,7 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use cm_dashboard_shared::{AgentData};
|
use cm_dashboard_shared::{AgentData};
|
||||||
use std::process::{Command, Output};
|
use std::process::Output;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio::time::timeout;
|
|
||||||
|
|
||||||
pub mod backup;
|
pub mod backup;
|
||||||
pub mod cpu;
|
pub mod cpu;
|
||||||
@ -16,16 +15,29 @@ pub mod systemd;
|
|||||||
pub use error::CollectorError;
|
pub use error::CollectorError;
|
||||||
|
|
||||||
/// Run a command with a timeout to prevent blocking
|
/// Run a command with a timeout to prevent blocking
|
||||||
pub async fn run_command_with_timeout(mut cmd: Command, timeout_secs: u64) -> std::io::Result<Output> {
|
/// Properly kills the process if timeout is exceeded
|
||||||
|
pub async fn run_command_with_timeout(mut cmd: tokio::process::Command, timeout_secs: u64) -> std::io::Result<Output> {
|
||||||
|
use tokio::time::timeout;
|
||||||
let timeout_duration = Duration::from_secs(timeout_secs);
|
let timeout_duration = Duration::from_secs(timeout_secs);
|
||||||
|
|
||||||
match timeout(timeout_duration, tokio::task::spawn_blocking(move || cmd.output())).await {
|
let child = cmd.spawn()?;
|
||||||
Ok(Ok(result)) => result,
|
let pid = child.id();
|
||||||
Ok(Err(e)) => Err(std::io::Error::new(std::io::ErrorKind::Other, e)),
|
|
||||||
Err(_) => Err(std::io::Error::new(
|
match timeout(timeout_duration, child.wait_with_output()).await {
|
||||||
std::io::ErrorKind::TimedOut,
|
Ok(result) => result,
|
||||||
format!("Command timed out after {} seconds", timeout_secs)
|
Err(_) => {
|
||||||
)),
|
// Timeout - force kill the process using system kill command
|
||||||
|
if let Some(process_id) = pid {
|
||||||
|
let _ = tokio::process::Command::new("kill")
|
||||||
|
.args(&["-9", &process_id.to_string()])
|
||||||
|
.output()
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::TimedOut,
|
||||||
|
format!("Command timed out after {} seconds", timeout_secs)
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.219"
|
version = "0.1.220"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.219"
|
version = "0.1.220"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user