Compare commits

..

6 Commits

Author SHA1 Message Date
ad1fcaa27b Fix collector interval timing to prevent excessive SMART checks
All checks were successful
Build and Release / build-and-release (push) Successful in 1m46s
Collectors now respect their configured intervals instead of running
every transmission cycle (2s). This prevents disk SMART checks from
running every 2 seconds, which was causing constant disk activity.

- Add TimedCollector wrapper with interval tracking
- Only collect from collectors whose interval has elapsed
- Disk collector now properly runs every 300s instead of every 2s
- Bump version to v0.1.229
2025-12-01 13:03:45 +01:00
60ab4d4f9e Fix service panel column width calculation
All checks were successful
Build and Release / build-and-release (push) Successful in 1m10s
Replace hardcoded terminal width thresholds with dynamic calculation
based on actual column requirements. Column visibility now adapts
correctly at 58, 52, 43, and 34 character widths instead of the
previous arbitrary 80, 60, 45 thresholds.

- Add width constants for each column (NAME=23, STATUS=10, etc)
- Calculate cumulative widths dynamically for each layout tier
- Ensure header and data formatting use consistent width values
- Fix service name truncation to respect calculated column width
2025-11-30 12:09:44 +01:00
67034c84b9 Add responsive column visibility to service panel
All checks were successful
Build and Release / build-and-release (push) Successful in 1m47s
Service panel now dynamically shows/hides columns based on terminal width:
- ≥80 chars: All columns (Name, Status, RAM, Uptime, Restarts)
- ≥60 chars: Hide Restarts only
- ≥45 chars: Hide Uptime and Restarts
- <45 chars: Minimal (Name and Status only)

Improves dashboard usability on smaller terminal sizes.
2025-11-30 10:50:08 +01:00
c62c7fa698 Remove debug logging from disk collector
All checks were successful
Build and Release / build-and-release (push) Successful in 1m11s
Removed all debug! statements from disk collector to reduce log noise.

Bump version to v0.1.226
2025-11-30 00:44:38 +01:00
0b1d8c0a73 Fix Data_3 showing as unknown by handling smartctl warning exit codes
All checks were successful
Build and Release / build-and-release (push) Successful in 1m11s
Root cause: sda's temperature exceeded threshold in the past, causing
smartctl to return exit code 32 (warning: "Attributes have been <= threshold
in the past"). The agent checked output.status.success() and rejected the
entire output as failed, even though the data (serial, temperature, health)
was perfectly valid.

Smartctl exit codes are bit flags for informational warnings:
- Exit 0: No warnings
- Exit 32 (bit 5): Attributes were at/below threshold in past
- Exit 64 (bit 6): Error log has entries
- etc.

The output data is valid regardless of these warning flags.

Solution: Parse output as long as it's not empty, ignore exit code.
Only return UNKNOWN if output is actually empty (command truly failed).

Result: Data_3 will now show "ZDZ4VE0B T: 31°C" instead of "? Data_3: sda"

Bump version to v0.1.225
2025-11-30 00:35:19 +01:00
c77aa6eaaa Fix Data_3 timeout by removing sequential SMART during pool detection
All checks were successful
Build and Release / build-and-release (push) Successful in 1m34s
Root cause: SMART data was collected TWICE:
1. Sequential collection during pool detection in get_drive_info_for_path()
   using problematic tokio::task::block_in_place() nesting
2. Parallel collection in get_smart_data_for_drives() (v0.1.223)

The sequential collection happened FIRST during pool detection, causing
sda (Data_3) to timeout due to:
- Bad async nesting: block_in_place() wrapping block_on()
- Sequential execution causing runtime issues
- sda being third in sequence, runtime degraded by then

Solution: Remove SMART collection from get_drive_info_for_path().
Pool drive temperatures are populated later from the parallel SMART
collection which properly uses futures::join_all.

Benefits:
- Eliminates problematic async nesting
- All SMART queries happen once in parallel only
- sda/Data_3 should now show serial (ZDZ4VE0B) and temperature

Bump version to v0.1.224
2025-11-30 00:14:25 +01:00
7 changed files with 225 additions and 76 deletions

6
Cargo.lock generated
View File

@@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
[[package]] [[package]]
name = "cm-dashboard" name = "cm-dashboard"
version = "0.1.222" version = "0.1.229"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"chrono", "chrono",
@@ -301,7 +301,7 @@ dependencies = [
[[package]] [[package]]
name = "cm-dashboard-agent" name = "cm-dashboard-agent"
version = "0.1.222" version = "0.1.229"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-trait", "async-trait",
@@ -325,7 +325,7 @@ dependencies = [
[[package]] [[package]]
name = "cm-dashboard-shared" name = "cm-dashboard-shared"
version = "0.1.222" version = "0.1.229"
dependencies = [ dependencies = [
"chrono", "chrono",
"serde", "serde",

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "cm-dashboard-agent" name = "cm-dashboard-agent"
version = "0.1.223" version = "0.1.229"
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@@ -1,6 +1,6 @@
use anyhow::Result; use anyhow::Result;
use gethostname::gethostname; use gethostname::gethostname;
use std::time::Duration; use std::time::{Duration, Instant};
use tokio::time::interval; use tokio::time::interval;
use tracing::{debug, error, info}; use tracing::{debug, error, info};
@@ -19,11 +19,19 @@ use crate::collectors::{
use crate::notifications::NotificationManager; use crate::notifications::NotificationManager;
use cm_dashboard_shared::AgentData; use cm_dashboard_shared::AgentData;
/// Wrapper for collectors with timing information
struct TimedCollector {
collector: Box<dyn Collector>,
interval: Duration,
last_collection: Option<Instant>,
name: String,
}
pub struct Agent { pub struct Agent {
hostname: String, hostname: String,
config: AgentConfig, config: AgentConfig,
zmq_handler: ZmqHandler, zmq_handler: ZmqHandler,
collectors: Vec<Box<dyn Collector>>, collectors: Vec<TimedCollector>,
notification_manager: NotificationManager, notification_manager: NotificationManager,
previous_status: Option<SystemStatus>, previous_status: Option<SystemStatus>,
} }
@@ -55,36 +63,78 @@ impl Agent {
config.zmq.publisher_port config.zmq.publisher_port
); );
// Initialize collectors // Initialize collectors with timing information
let mut collectors: Vec<Box<dyn Collector>> = Vec::new(); let mut collectors: Vec<TimedCollector> = Vec::new();
// Add enabled collectors // Add enabled collectors
if config.collectors.cpu.enabled { if config.collectors.cpu.enabled {
collectors.push(Box::new(CpuCollector::new(config.collectors.cpu.clone()))); collectors.push(TimedCollector {
collector: Box::new(CpuCollector::new(config.collectors.cpu.clone())),
interval: Duration::from_secs(config.collectors.cpu.interval_seconds),
last_collection: None,
name: "CPU".to_string(),
});
info!("CPU collector initialized with {}s interval", config.collectors.cpu.interval_seconds);
} }
if config.collectors.memory.enabled { if config.collectors.memory.enabled {
collectors.push(Box::new(MemoryCollector::new(config.collectors.memory.clone()))); collectors.push(TimedCollector {
collector: Box::new(MemoryCollector::new(config.collectors.memory.clone())),
interval: Duration::from_secs(config.collectors.memory.interval_seconds),
last_collection: None,
name: "Memory".to_string(),
});
info!("Memory collector initialized with {}s interval", config.collectors.memory.interval_seconds);
} }
if config.collectors.disk.enabled { if config.collectors.disk.enabled {
collectors.push(Box::new(DiskCollector::new(config.collectors.disk.clone()))); collectors.push(TimedCollector {
collector: Box::new(DiskCollector::new(config.collectors.disk.clone())),
interval: Duration::from_secs(config.collectors.disk.interval_seconds),
last_collection: None,
name: "Disk".to_string(),
});
info!("Disk collector initialized with {}s interval", config.collectors.disk.interval_seconds);
} }
if config.collectors.systemd.enabled { if config.collectors.systemd.enabled {
collectors.push(Box::new(SystemdCollector::new(config.collectors.systemd.clone()))); collectors.push(TimedCollector {
collector: Box::new(SystemdCollector::new(config.collectors.systemd.clone())),
interval: Duration::from_secs(config.collectors.systemd.interval_seconds),
last_collection: None,
name: "Systemd".to_string(),
});
info!("Systemd collector initialized with {}s interval", config.collectors.systemd.interval_seconds);
} }
if config.collectors.backup.enabled { if config.collectors.backup.enabled {
collectors.push(Box::new(BackupCollector::new())); collectors.push(TimedCollector {
collector: Box::new(BackupCollector::new()),
interval: Duration::from_secs(config.collectors.backup.interval_seconds),
last_collection: None,
name: "Backup".to_string(),
});
info!("Backup collector initialized with {}s interval", config.collectors.backup.interval_seconds);
} }
if config.collectors.network.enabled { if config.collectors.network.enabled {
collectors.push(Box::new(NetworkCollector::new(config.collectors.network.clone()))); collectors.push(TimedCollector {
collector: Box::new(NetworkCollector::new(config.collectors.network.clone())),
interval: Duration::from_secs(config.collectors.network.interval_seconds),
last_collection: None,
name: "Network".to_string(),
});
info!("Network collector initialized with {}s interval", config.collectors.network.interval_seconds);
} }
if config.collectors.nixos.enabled { if config.collectors.nixos.enabled {
collectors.push(Box::new(NixOSCollector::new(config.collectors.nixos.clone()))); collectors.push(TimedCollector {
collector: Box::new(NixOSCollector::new(config.collectors.nixos.clone())),
interval: Duration::from_secs(config.collectors.nixos.interval_seconds),
last_collection: None,
name: "NixOS".to_string(),
});
info!("NixOS collector initialized with {}s interval", config.collectors.nixos.interval_seconds);
} }
info!("Initialized {} collectors", collectors.len()); info!("Initialized {} collectors", collectors.len());
@@ -152,11 +202,27 @@ impl Agent {
// Initialize empty AgentData // Initialize empty AgentData
let mut agent_data = AgentData::new(self.hostname.clone(), env!("CARGO_PKG_VERSION").to_string()); let mut agent_data = AgentData::new(self.hostname.clone(), env!("CARGO_PKG_VERSION").to_string());
// Collect data from all collectors // Collect data from collectors whose intervals have elapsed
for collector in &self.collectors { let now = Instant::now();
if let Err(e) = collector.collect_structured(&mut agent_data).await { for timed_collector in &mut self.collectors {
error!("Collector failed: {}", e); let should_collect = match timed_collector.last_collection {
// Continue with other collectors even if one fails None => true, // First collection
Some(last_time) => now.duration_since(last_time) >= timed_collector.interval,
};
if should_collect {
if let Err(e) = timed_collector.collector.collect_structured(&mut agent_data).await {
error!("Collector {} failed: {}", timed_collector.name, e);
// Update last_collection time even on failure to prevent immediate retries
timed_collector.last_collection = Some(now);
} else {
timed_collector.last_collection = Some(now);
debug!(
"Collected from {} ({}s interval)",
timed_collector.name,
timed_collector.interval.as_secs()
);
}
} }
} }

View File

@@ -5,9 +5,7 @@ use cm_dashboard_shared::{AgentData, DriveData, FilesystemData, PoolData, Hyster
use crate::config::DiskConfig; use crate::config::DiskConfig;
use tokio::process::Command as TokioCommand; use tokio::process::Command as TokioCommand;
use std::process::Command as StdCommand; use std::process::Command as StdCommand;
use std::time::Instant;
use std::collections::HashMap; use std::collections::HashMap;
use tracing::debug;
use super::{Collector, CollectorError}; use super::{Collector, CollectorError};
@@ -68,9 +66,6 @@ impl DiskCollector {
/// Collect all storage data and populate AgentData /// Collect all storage data and populate AgentData
async fn collect_storage_data(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> { async fn collect_storage_data(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
let start_time = Instant::now();
debug!("Starting clean storage collection");
// Step 1: Get mount points and their backing devices // Step 1: Get mount points and their backing devices
let mount_devices = self.get_mount_devices().await?; let mount_devices = self.get_mount_devices().await?;
@@ -105,9 +100,6 @@ impl DiskCollector {
self.populate_drives_data(&physical_drives, &smart_data, agent_data)?; self.populate_drives_data(&physical_drives, &smart_data, agent_data)?;
self.populate_pools_data(&mergerfs_pools, &smart_data, agent_data)?; self.populate_pools_data(&mergerfs_pools, &smart_data, agent_data)?;
let elapsed = start_time.elapsed();
debug!("Storage collection completed in {:?}", elapsed);
Ok(()) Ok(())
} }
@@ -142,7 +134,6 @@ impl DiskCollector {
} }
} }
debug!("Found {} mounted block devices", mount_devices.len());
Ok(mount_devices) Ok(mount_devices)
} }
@@ -155,8 +146,8 @@ impl DiskCollector {
Ok((total, used)) => { Ok((total, used)) => {
filesystem_usage.insert(mount_point.clone(), (total, used)); filesystem_usage.insert(mount_point.clone(), (total, used));
} }
Err(e) => { Err(_e) => {
debug!("Failed to get filesystem info for {}: {}", mount_point, e); // Silently skip filesystems we can't read
} }
} }
} }
@@ -177,8 +168,6 @@ impl DiskCollector {
// Only add if we don't already have usage data for this mount point // Only add if we don't already have usage data for this mount point
if !filesystem_usage.contains_key(&mount_point) { if !filesystem_usage.contains_key(&mount_point) {
if let Ok((total, used)) = self.get_filesystem_info(&mount_point) { if let Ok((total, used)) = self.get_filesystem_info(&mount_point) {
debug!("Added MergerFS filesystem usage for {}: {}GB total, {}GB used",
mount_point, total as f32 / (1024.0 * 1024.0 * 1024.0), used as f32 / (1024.0 * 1024.0 * 1024.0));
filesystem_usage.insert(mount_point, (total, used)); filesystem_usage.insert(mount_point, (total, used));
} }
} }
@@ -253,9 +242,8 @@ impl DiskCollector {
} else { } else {
mount_point.trim_start_matches('/').replace('/', "_") mount_point.trim_start_matches('/').replace('/', "_")
}; };
if pool_name.is_empty() { if pool_name.is_empty() {
debug!("Skipping mergerfs pool with empty name: {}", mount_point);
continue; continue;
} }
@@ -283,8 +271,7 @@ impl DiskCollector {
// Categorize as data vs parity drives // Categorize as data vs parity drives
let (data_drives, parity_drives) = match self.categorize_pool_drives(&all_member_paths) { let (data_drives, parity_drives) = match self.categorize_pool_drives(&all_member_paths) {
Ok(drives) => drives, Ok(drives) => drives,
Err(e) => { Err(_e) => {
debug!("Failed to categorize drives for pool {}: {}. Skipping.", mount_point, e);
continue; continue;
} }
}; };
@@ -299,8 +286,7 @@ impl DiskCollector {
}); });
} }
} }
debug!("Found {} mergerfs pools", pools.len());
Ok(pools) Ok(pools)
} }
@@ -451,8 +437,10 @@ impl DiskCollector {
let output_str = String::from_utf8_lossy(&output.stdout); let output_str = String::from_utf8_lossy(&output.stdout);
if !output.status.success() { // Note: smartctl returns non-zero exit codes for warnings (like exit code 32
// Return unknown data rather than failing completely // for "temperature was high in the past"), but the output data is still valid.
// Only check if we got any output at all, don't reject based on exit code.
if output_str.is_empty() {
return Ok(SmartData { return Ok(SmartData {
health: "UNKNOWN".to_string(), health: "UNKNOWN".to_string(),
serial_number: None, serial_number: None,
@@ -460,7 +448,7 @@ impl DiskCollector {
wear_percent: None, wear_percent: None,
}); });
} }
let mut health = "UNKNOWN".to_string(); let mut health = "UNKNOWN".to_string();
let mut serial_number = None; let mut serial_number = None;
let mut temperature = None; let mut temperature = None;
@@ -801,20 +789,13 @@ impl DiskCollector {
// Extract base device name (e.g., "sda1" -> "sda") // Extract base device name (e.g., "sda1" -> "sda")
let base_device = self.extract_base_device(&format!("/dev/{}", device)); let base_device = self.extract_base_device(&format!("/dev/{}", device));
// Get temperature from SMART data if available // Temperature will be filled in later from parallel SMART collection
let temperature = if let Ok(smart_data) = tokio::task::block_in_place(|| { // Don't collect it here to avoid sequential blocking with problematic async nesting
tokio::runtime::Handle::current().block_on(self.get_smart_data(&base_device))
}) {
smart_data.temperature_celsius
} else {
None
};
Ok(PoolDrive { Ok(PoolDrive {
name: base_device, name: base_device,
mount_point: path.to_string(), mount_point: path.to_string(),
temperature_celsius: temperature, temperature_celsius: None,
}) })
} }

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "cm-dashboard" name = "cm-dashboard"
version = "0.1.223" version = "0.1.229"
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@@ -11,6 +11,74 @@ use tracing::debug;
use crate::ui::theme::{Components, StatusIcons, Theme, Typography}; use crate::ui::theme::{Components, StatusIcons, Theme, Typography};
use ratatui::style::Style; use ratatui::style::Style;
/// Column visibility configuration based on terminal width
#[derive(Debug, Clone, Copy)]
struct ColumnVisibility {
show_name: bool,
show_status: bool,
show_ram: bool,
show_uptime: bool,
show_restarts: bool,
}
impl ColumnVisibility {
/// Calculate actual width needed for all columns
const NAME_WIDTH: u16 = 23;
const STATUS_WIDTH: u16 = 10;
const RAM_WIDTH: u16 = 8;
const UPTIME_WIDTH: u16 = 8;
const RESTARTS_WIDTH: u16 = 5;
const COLUMN_SPACING: u16 = 1; // Space between columns
/// Determine which columns to show based on available width
/// Priority order: Name > Status > RAM > Uptime > Restarts
fn from_width(width: u16) -> Self {
// Calculate cumulative widths for each configuration
let minimal = Self::NAME_WIDTH + Self::COLUMN_SPACING + Self::STATUS_WIDTH; // 34
let with_ram = minimal + Self::COLUMN_SPACING + Self::RAM_WIDTH; // 43
let with_uptime = with_ram + Self::COLUMN_SPACING + Self::UPTIME_WIDTH; // 52
let full = with_uptime + Self::COLUMN_SPACING + Self::RESTARTS_WIDTH; // 58
if width >= full {
// Show all columns
Self {
show_name: true,
show_status: true,
show_ram: true,
show_uptime: true,
show_restarts: true,
}
} else if width >= with_uptime {
// Hide restarts
Self {
show_name: true,
show_status: true,
show_ram: true,
show_uptime: true,
show_restarts: false,
}
} else if width >= with_ram {
// Hide uptime and restarts
Self {
show_name: true,
show_status: true,
show_ram: true,
show_uptime: false,
show_restarts: false,
}
} else {
// Minimal: Name + Status only
Self {
show_name: true,
show_status: true,
show_ram: false,
show_uptime: false,
show_restarts: false,
}
}
}
}
/// Services widget displaying hierarchical systemd service statuses /// Services widget displaying hierarchical systemd service statuses
#[derive(Clone)] #[derive(Clone)]
pub struct ServicesWidget { pub struct ServicesWidget {
@@ -76,10 +144,12 @@ impl ServicesWidget {
} }
/// Format parent service line - returns text without icon for span formatting /// Format parent service line - returns text without icon for span formatting
fn format_parent_service_line(&self, name: &str, info: &ServiceInfo) -> String { fn format_parent_service_line(&self, name: &str, info: &ServiceInfo, columns: ColumnVisibility) -> String {
// Truncate long service names to fit layout (account for icon space) // Truncate long service names to fit layout
let short_name = if name.len() > 22 { // NAME_WIDTH - 3 chars for "..." = max displayable chars
format!("{}...", &name[..19]) let max_name_len = (ColumnVisibility::NAME_WIDTH - 3) as usize;
let short_name = if name.len() > max_name_len {
format!("{}...", &name[..max_name_len.saturating_sub(3)])
} else { } else {
name.to_string() name.to_string()
}; };
@@ -129,10 +199,25 @@ impl ServicesWidget {
} }
}); });
format!( // Build format string based on column visibility
"{:<23} {:<10} {:<8} {:<8} {:<5}", let mut parts = Vec::new();
short_name, status_str, memory_str, uptime_str, restart_str if columns.show_name {
) parts.push(format!("{:<width$}", short_name, width = ColumnVisibility::NAME_WIDTH as usize));
}
if columns.show_status {
parts.push(format!("{:<width$}", status_str, width = ColumnVisibility::STATUS_WIDTH as usize));
}
if columns.show_ram {
parts.push(format!("{:<width$}", memory_str, width = ColumnVisibility::RAM_WIDTH as usize));
}
if columns.show_uptime {
parts.push(format!("{:<width$}", uptime_str, width = ColumnVisibility::UPTIME_WIDTH as usize));
}
if columns.show_restarts {
parts.push(format!("{:<width$}", restart_str, width = ColumnVisibility::RESTARTS_WIDTH as usize));
}
parts.join(" ")
} }
@@ -476,11 +561,28 @@ impl ServicesWidget {
.constraints([Constraint::Length(1), Constraint::Min(0)]) .constraints([Constraint::Length(1), Constraint::Min(0)])
.split(inner_area); .split(inner_area);
// Header // Determine which columns to show based on available width
let header = format!( let columns = ColumnVisibility::from_width(inner_area.width);
"{:<25} {:<10} {:<8} {:<8} {:<5}",
"Service:", "Status:", "RAM:", "Uptime:", "↻:" // Build header based on visible columns
); let mut header_parts = Vec::new();
if columns.show_name {
header_parts.push(format!("{:<width$}", "Service:", width = ColumnVisibility::NAME_WIDTH as usize));
}
if columns.show_status {
header_parts.push(format!("{:<width$}", "Status:", width = ColumnVisibility::STATUS_WIDTH as usize));
}
if columns.show_ram {
header_parts.push(format!("{:<width$}", "RAM:", width = ColumnVisibility::RAM_WIDTH as usize));
}
if columns.show_uptime {
header_parts.push(format!("{:<width$}", "Uptime:", width = ColumnVisibility::UPTIME_WIDTH as usize));
}
if columns.show_restarts {
header_parts.push(format!("{:<width$}", "↻:", width = ColumnVisibility::RESTARTS_WIDTH as usize));
}
let header = header_parts.join(" ");
let header_para = Paragraph::new(header).style(Typography::muted()); let header_para = Paragraph::new(header).style(Typography::muted());
frame.render_widget(header_para, content_chunks[0]); frame.render_widget(header_para, content_chunks[0]);
@@ -492,11 +594,11 @@ impl ServicesWidget {
} }
// Render the services list // Render the services list
self.render_services(frame, content_chunks[1], is_focused); self.render_services(frame, content_chunks[1], is_focused, columns);
} }
/// Render services list /// Render services list
fn render_services(&mut self, frame: &mut Frame, area: Rect, is_focused: bool) { fn render_services(&mut self, frame: &mut Frame, area: Rect, is_focused: bool, columns: ColumnVisibility) {
// Build hierarchical service list for display // Build hierarchical service list for display
let mut display_lines: Vec<(String, Status, bool, Option<(ServiceInfo, bool)>)> = Vec::new(); let mut display_lines: Vec<(String, Status, bool, Option<(ServiceInfo, bool)>)> = Vec::new();
@@ -506,7 +608,7 @@ impl ServicesWidget {
for (parent_name, parent_info) in parent_services { for (parent_name, parent_info) in parent_services {
// Add parent service line // Add parent service line
let parent_line = self.format_parent_service_line(parent_name, parent_info); let parent_line = self.format_parent_service_line(parent_name, parent_info, columns);
display_lines.push((parent_line, parent_info.widget_status, false, None)); display_lines.push((parent_line, parent_info.widget_status, false, None));
// Add sub-services for this parent (if any) // Add sub-services for this parent (if any)

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "cm-dashboard-shared" name = "cm-dashboard-shared"
version = "0.1.223" version = "0.1.229"
edition = "2021" edition = "2021"
[dependencies] [dependencies]