Update version to 0.1.22 and fix system metric status calculation
All checks were successful
Build and Release / build-and-release (push) Successful in 1m11s

- Fix /tmp usage status to use proper thresholds instead of hardcoded Ok status
- Fix wear level status to use configurable thresholds instead of hardcoded values
- Add dedicated tmp_status field to SystemWidget for proper /tmp status display
- Remove host-level hourglass icon during service operations
- Implement immediate service status updates after start/stop/restart commands
- Remove active users display and collection from NixOS section
- Fix immediate host status aggregation transmission to dashboard
This commit is contained in:
2025-10-28 13:21:56 +01:00
parent 43242debce
commit 2910b7d875
12 changed files with 51 additions and 105 deletions

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-agent"
version = "0.1.21"
version = "0.1.22"
edition = "2021"
[dependencies]

View File

@@ -270,7 +270,7 @@ impl Agent {
}
/// Handle systemd service control commands
async fn handle_service_control(&self, service_name: &str, action: &ServiceAction) -> Result<()> {
async fn handle_service_control(&mut self, service_name: &str, action: &ServiceAction) -> Result<()> {
let action_str = match action {
ServiceAction::Start => "start",
ServiceAction::Stop => "stop",
@@ -300,9 +300,12 @@ impl Agent {
// Force refresh metrics after service control to update service status
if matches!(action, ServiceAction::Start | ServiceAction::Stop | ServiceAction::Restart) {
info!("Triggering metric refresh after service control");
// Note: We can't call self.collect_metrics_only() here due to borrowing issues
// The next metric collection cycle will pick up the changes
info!("Triggering immediate metric refresh after service control");
if let Err(e) = self.collect_metrics_only().await {
error!("Failed to refresh metrics after service control: {}", e);
} else {
info!("Service status refreshed immediately after {} {}", action_str, service_name);
}
}
Ok(())

View File

@@ -556,8 +556,8 @@ impl Collector for DiskCollector {
// Drive wear level (for SSDs)
if let Some(wear) = drive.wear_level {
let wear_status = if wear >= 90.0 { Status::Critical }
else if wear >= 80.0 { Status::Warning }
let wear_status = if wear >= self.config.wear_critical_percent { Status::Critical }
else if wear >= self.config.wear_warning_percent { Status::Warning }
else { Status::Ok };
metrics.push(Metric {

View File

@@ -187,7 +187,7 @@ impl MemoryCollector {
}
// Monitor tmpfs (/tmp) usage
if let Ok(tmpfs_metrics) = self.get_tmpfs_metrics() {
if let Ok(tmpfs_metrics) = self.get_tmpfs_metrics(status_tracker) {
metrics.extend(tmpfs_metrics);
}
@@ -195,7 +195,7 @@ impl MemoryCollector {
}
/// Get tmpfs (/tmp) usage metrics
fn get_tmpfs_metrics(&self) -> Result<Vec<Metric>, CollectorError> {
fn get_tmpfs_metrics(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
use std::process::Command;
let output = Command::new("df")
@@ -249,12 +249,15 @@ impl MemoryCollector {
let mut metrics = Vec::new();
let timestamp = chrono::Utc::now().timestamp() as u64;
// Calculate status using same thresholds as main memory
let tmp_status = self.calculate_usage_status("memory_tmp_usage_percent", usage_percent, status_tracker);
metrics.push(Metric {
name: "memory_tmp_usage_percent".to_string(),
value: MetricValue::Float(usage_percent),
unit: Some("%".to_string()),
description: Some("tmpfs /tmp usage percentage".to_string()),
status: Status::Ok,
status: tmp_status,
timestamp,
});

View File

@@ -10,7 +10,6 @@ use crate::config::NixOSConfig;
///
/// Collects NixOS-specific system information including:
/// - NixOS version and build information
/// - Currently active/logged in users
pub struct NixOSCollector {
}
@@ -65,27 +64,6 @@ impl NixOSCollector {
Err("Could not extract hash from nix store path".into())
}
/// Get currently active users
fn get_active_users(&self) -> Result<Vec<String>, Box<dyn std::error::Error>> {
let output = Command::new("who").output()?;
if !output.status.success() {
return Err("who command failed".into());
}
let who_output = String::from_utf8_lossy(&output.stdout);
let mut users = std::collections::HashSet::new();
for line in who_output.lines() {
if let Some(username) = line.split_whitespace().next() {
if !username.is_empty() {
users.insert(username.to_string());
}
}
}
Ok(users.into_iter().collect())
}
}
#[async_trait]
@@ -121,31 +99,6 @@ impl Collector for NixOSCollector {
}
}
// Collect active users
match self.get_active_users() {
Ok(users) => {
let users_str = users.join(", ");
metrics.push(Metric {
name: "system_active_users".to_string(),
value: MetricValue::String(users_str),
unit: None,
description: Some("Currently active users".to_string()),
status: Status::Ok,
timestamp,
});
}
Err(e) => {
debug!("Failed to get active users: {}", e);
metrics.push(Metric {
name: "system_active_users".to_string(),
value: MetricValue::String("unknown".to_string()),
unit: None,
description: Some("Active users (failed to detect)".to_string()),
status: Status::Unknown,
timestamp,
});
}
}
// Collect config hash
match self.get_config_hash() {

View File

@@ -160,27 +160,37 @@ impl HostStatusManager {
/// Process a metric - updates status and queues for aggregated notifications if status changed
pub async fn process_metric(&mut self, metric: &Metric, _notification_manager: &mut crate::notifications::NotificationManager) -> bool {
let old_status = self.service_statuses.get(&metric.name).copied();
let new_status = metric.status;
let old_service_status = self.service_statuses.get(&metric.name).copied();
let old_host_status = self.current_host_status;
let new_service_status = metric.status;
// Update status
self.update_service_status(metric.name.clone(), new_status);
// Update status (this recalculates host status internally)
self.update_service_status(metric.name.clone(), new_service_status);
// Check if status actually changed (ignore first-time status setting)
if let Some(old_status) = old_status {
if old_status != new_status {
debug!("Status change detected for {}: {:?} -> {:?}", metric.name, old_status, new_status);
let new_host_status = self.current_host_status;
let mut status_changed = false;
// Check if service status actually changed (ignore first-time status setting)
if let Some(old_service_status) = old_service_status {
if old_service_status != new_service_status {
debug!("Service status change detected for {}: {:?} -> {:?}", metric.name, old_service_status, new_service_status);
// Queue change for aggregated notification (not immediate)
self.queue_status_change(&metric.name, old_status, new_status);
self.queue_status_change(&metric.name, old_service_status, new_service_status);
return true; // Status changed - caller should trigger immediate transmission
status_changed = true;
}
} else {
debug!("Initial status set for {}: {:?}", metric.name, new_status);
debug!("Initial status set for {}: {:?}", metric.name, new_service_status);
}
false // No status change (or first-time status)
// Check if host status changed (this should trigger immediate transmission)
if old_host_status != new_host_status {
debug!("Host status change detected: {:?} -> {:?}", old_host_status, new_host_status);
status_changed = true;
}
status_changed // Return true if either service or host status changed
}
/// Queue status change for aggregated notification