Compare commits

...

8 Commits

Author SHA1 Message Date
77bf08a978 Fix blocking smartctl commands with proper async/timeout handling
All checks were successful
Build and Release / build-and-release (push) Successful in 2m2s
- Changed disk collector to use tokio::process::Command instead of std::process::Command
- Updated run_command_with_timeout to properly kill processes on timeout
- Fixes issue where smartctl hangs on problematic drives (/dev/sda) freezing entire agent
- Timeout now force-kills hung processes using kill -9, preventing orphaned smartctl processes

This resolves the issue where Data_3 showed unknown status because smartctl was hanging
indefinitely trying to read from a problematic drive, blocking the entire collector.

Bump version to v0.1.220

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-29 21:09:04 +01:00
929870f8b6 Bump version to v0.1.219
All checks were successful
Build and Release / build-and-release (push) Successful in 1m11s
2025-11-29 18:35:14 +01:00
7aae852b7b Bump version to v0.1.218
All checks were successful
Build and Release / build-and-release (push) Successful in 1m19s
2025-11-29 17:59:33 +01:00
40f3ff66d8 Show archive count range to detect inconsistencies
- Display single number if all services have same count
- Display min-max range if counts differ (indicates problem)
2025-11-29 17:59:24 +01:00
1c1beddb55 Bump version to v0.1.217
All checks were successful
Build and Release / build-and-release (push) Successful in 1m20s
2025-11-29 17:51:13 +01:00
620d1f10b6 Show archive count per service instead of total sum 2025-11-29 17:51:01 +01:00
a0d571a40e Bump version to v0.1.216
All checks were successful
Build and Release / build-and-release (push) Successful in 1m19s
2025-11-29 17:44:12 +01:00
977200fff3 Move archive count to Usage line in backup display 2025-11-29 17:44:05 +01:00
9 changed files with 52 additions and 28 deletions

6
Cargo.lock generated
View File

@@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
[[package]]
name = "cm-dashboard"
version = "0.1.214"
version = "0.1.219"
dependencies = [
"anyhow",
"chrono",
@@ -301,7 +301,7 @@ dependencies = [
[[package]]
name = "cm-dashboard-agent"
version = "0.1.214"
version = "0.1.219"
dependencies = [
"anyhow",
"async-trait",
@@ -324,7 +324,7 @@ dependencies = [
[[package]]
name = "cm-dashboard-shared"
version = "0.1.214"
version = "0.1.219"
dependencies = [
"chrono",
"serde",

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-agent"
version = "0.1.215"
version = "0.1.220"
edition = "2021"
[dependencies]

View File

@@ -142,10 +142,16 @@ impl BackupCollector {
// Build service list for this disk
let services: Vec<String> = backup_status.services.keys().cloned().collect();
// Calculate total archives across all services on this disk
let total_archives: i64 = backup_status.services.values()
// Get min and max archive counts to detect inconsistencies
let archives_min: i64 = backup_status.services.values()
.map(|service| service.archive_count)
.sum();
.min()
.unwrap_or(0);
let archives_max: i64 = backup_status.services.values()
.map(|service| service.archive_count)
.max()
.unwrap_or(0);
// Create disk data
let disk_data = BackupDiskData {
@@ -160,7 +166,8 @@ impl BackupCollector {
disk_total_gb: total_gb,
usage_status,
services,
total_archives,
archives_min,
archives_max,
};
disks.push(disk_data);

View File

@@ -3,7 +3,7 @@ use async_trait::async_trait;
use cm_dashboard_shared::{AgentData, DriveData, FilesystemData, PoolData, HysteresisThresholds, Status};
use crate::config::DiskConfig;
use std::process::Command;
use tokio::process::Command;
use std::time::Instant;
use std::collections::HashMap;
use tracing::debug;
@@ -763,7 +763,7 @@ impl DiskCollector {
/// Get drive information for a mount path
fn get_drive_info_for_path(&self, path: &str) -> anyhow::Result<PoolDrive> {
// Use lsblk to find the backing device with timeout
let output = Command::new("timeout")
let output = std::process::Command::new("timeout")
.args(&["2", "lsblk", "-rn", "-o", "NAME,MOUNTPOINT"])
.output()
.map_err(|e| anyhow::anyhow!("Failed to run lsblk: {}", e))?;

View File

@@ -1,8 +1,7 @@
use async_trait::async_trait;
use cm_dashboard_shared::{AgentData};
use std::process::{Command, Output};
use std::process::Output;
use std::time::Duration;
use tokio::time::timeout;
pub mod backup;
pub mod cpu;
@@ -16,16 +15,29 @@ pub mod systemd;
pub use error::CollectorError;
/// Run a command with a timeout to prevent blocking
pub async fn run_command_with_timeout(mut cmd: Command, timeout_secs: u64) -> std::io::Result<Output> {
/// Properly kills the process if timeout is exceeded
pub async fn run_command_with_timeout(mut cmd: tokio::process::Command, timeout_secs: u64) -> std::io::Result<Output> {
use tokio::time::timeout;
let timeout_duration = Duration::from_secs(timeout_secs);
match timeout(timeout_duration, tokio::task::spawn_blocking(move || cmd.output())).await {
Ok(Ok(result)) => result,
Ok(Err(e)) => Err(std::io::Error::new(std::io::ErrorKind::Other, e)),
Err(_) => Err(std::io::Error::new(
std::io::ErrorKind::TimedOut,
format!("Command timed out after {} seconds", timeout_secs)
)),
let child = cmd.spawn()?;
let pid = child.id();
match timeout(timeout_duration, child.wait_with_output()).await {
Ok(result) => result,
Err(_) => {
// Timeout - force kill the process using system kill command
if let Some(process_id) = pid {
let _ = tokio::process::Command::new("kill")
.args(&["-9", &process_id.to_string()])
.output()
.await;
}
Err(std::io::Error::new(
std::io::ErrorKind::TimedOut,
format!("Command timed out after {} seconds", timeout_secs)
))
}
}
}

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard"
version = "0.1.215"
version = "0.1.220"
edition = "2021"
[dependencies]

View File

@@ -544,9 +544,6 @@ impl SystemWidget {
details.push(format!("W: {}%", wear as i32));
}
// Add archive count
details.push(format!("Archives: {}", disk.total_archives));
let disk_text = if !details.is_empty() {
format!("{} {}", truncated_serial, details.join(" "))
} else {
@@ -568,9 +565,16 @@ impl SystemWidget {
lines.push(Line::from(time_spans));
}
// Show usage with status
// Show usage with status and archive count
let archive_display = if disk.archives_min == disk.archives_max {
format!("{}", disk.archives_min)
} else {
format!("{}-{}", disk.archives_min, disk.archives_max)
};
let usage_text = format!(
"Usage: {:.0}% {:.0}GB/{:.0}GB",
"Usage: ({}) {:.0}% {:.0}GB/{:.0}GB",
archive_display,
disk.disk_usage_percent,
disk.disk_used_gb,
disk.disk_total_gb

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-shared"
version = "0.1.215"
version = "0.1.220"
edition = "2021"
[dependencies]

View File

@@ -195,7 +195,8 @@ pub struct BackupDiskData {
pub disk_total_gb: f32,
pub usage_status: Status,
pub services: Vec<String>,
pub total_archives: i64,
pub archives_min: i64,
pub archives_max: i64,
}
impl AgentData {