Update version to 0.1.22 and fix system metric status calculation
All checks were successful
Build and Release / build-and-release (push) Successful in 1m11s
All checks were successful
Build and Release / build-and-release (push) Successful in 1m11s
- Fix /tmp usage status to use proper thresholds instead of hardcoded Ok status - Fix wear level status to use configurable thresholds instead of hardcoded values - Add dedicated tmp_status field to SystemWidget for proper /tmp status display - Remove host-level hourglass icon during service operations - Implement immediate service status updates after start/stop/restart commands - Remove active users display and collection from NixOS section - Fix immediate host status aggregation transmission to dashboard
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.21"
|
||||
version = "0.1.22"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -270,7 +270,7 @@ impl Agent {
|
||||
}
|
||||
|
||||
/// Handle systemd service control commands
|
||||
async fn handle_service_control(&self, service_name: &str, action: &ServiceAction) -> Result<()> {
|
||||
async fn handle_service_control(&mut self, service_name: &str, action: &ServiceAction) -> Result<()> {
|
||||
let action_str = match action {
|
||||
ServiceAction::Start => "start",
|
||||
ServiceAction::Stop => "stop",
|
||||
@@ -300,9 +300,12 @@ impl Agent {
|
||||
|
||||
// Force refresh metrics after service control to update service status
|
||||
if matches!(action, ServiceAction::Start | ServiceAction::Stop | ServiceAction::Restart) {
|
||||
info!("Triggering metric refresh after service control");
|
||||
// Note: We can't call self.collect_metrics_only() here due to borrowing issues
|
||||
// The next metric collection cycle will pick up the changes
|
||||
info!("Triggering immediate metric refresh after service control");
|
||||
if let Err(e) = self.collect_metrics_only().await {
|
||||
error!("Failed to refresh metrics after service control: {}", e);
|
||||
} else {
|
||||
info!("Service status refreshed immediately after {} {}", action_str, service_name);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -556,8 +556,8 @@ impl Collector for DiskCollector {
|
||||
|
||||
// Drive wear level (for SSDs)
|
||||
if let Some(wear) = drive.wear_level {
|
||||
let wear_status = if wear >= 90.0 { Status::Critical }
|
||||
else if wear >= 80.0 { Status::Warning }
|
||||
let wear_status = if wear >= self.config.wear_critical_percent { Status::Critical }
|
||||
else if wear >= self.config.wear_warning_percent { Status::Warning }
|
||||
else { Status::Ok };
|
||||
|
||||
metrics.push(Metric {
|
||||
|
||||
@@ -187,7 +187,7 @@ impl MemoryCollector {
|
||||
}
|
||||
|
||||
// Monitor tmpfs (/tmp) usage
|
||||
if let Ok(tmpfs_metrics) = self.get_tmpfs_metrics() {
|
||||
if let Ok(tmpfs_metrics) = self.get_tmpfs_metrics(status_tracker) {
|
||||
metrics.extend(tmpfs_metrics);
|
||||
}
|
||||
|
||||
@@ -195,7 +195,7 @@ impl MemoryCollector {
|
||||
}
|
||||
|
||||
/// Get tmpfs (/tmp) usage metrics
|
||||
fn get_tmpfs_metrics(&self) -> Result<Vec<Metric>, CollectorError> {
|
||||
fn get_tmpfs_metrics(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
||||
use std::process::Command;
|
||||
|
||||
let output = Command::new("df")
|
||||
@@ -249,12 +249,15 @@ impl MemoryCollector {
|
||||
let mut metrics = Vec::new();
|
||||
let timestamp = chrono::Utc::now().timestamp() as u64;
|
||||
|
||||
// Calculate status using same thresholds as main memory
|
||||
let tmp_status = self.calculate_usage_status("memory_tmp_usage_percent", usage_percent, status_tracker);
|
||||
|
||||
metrics.push(Metric {
|
||||
name: "memory_tmp_usage_percent".to_string(),
|
||||
value: MetricValue::Float(usage_percent),
|
||||
unit: Some("%".to_string()),
|
||||
description: Some("tmpfs /tmp usage percentage".to_string()),
|
||||
status: Status::Ok,
|
||||
status: tmp_status,
|
||||
timestamp,
|
||||
});
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ use crate::config::NixOSConfig;
|
||||
///
|
||||
/// Collects NixOS-specific system information including:
|
||||
/// - NixOS version and build information
|
||||
/// - Currently active/logged in users
|
||||
pub struct NixOSCollector {
|
||||
}
|
||||
|
||||
@@ -65,27 +64,6 @@ impl NixOSCollector {
|
||||
Err("Could not extract hash from nix store path".into())
|
||||
}
|
||||
|
||||
/// Get currently active users
|
||||
fn get_active_users(&self) -> Result<Vec<String>, Box<dyn std::error::Error>> {
|
||||
let output = Command::new("who").output()?;
|
||||
|
||||
if !output.status.success() {
|
||||
return Err("who command failed".into());
|
||||
}
|
||||
|
||||
let who_output = String::from_utf8_lossy(&output.stdout);
|
||||
let mut users = std::collections::HashSet::new();
|
||||
|
||||
for line in who_output.lines() {
|
||||
if let Some(username) = line.split_whitespace().next() {
|
||||
if !username.is_empty() {
|
||||
users.insert(username.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(users.into_iter().collect())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -121,31 +99,6 @@ impl Collector for NixOSCollector {
|
||||
}
|
||||
}
|
||||
|
||||
// Collect active users
|
||||
match self.get_active_users() {
|
||||
Ok(users) => {
|
||||
let users_str = users.join(", ");
|
||||
metrics.push(Metric {
|
||||
name: "system_active_users".to_string(),
|
||||
value: MetricValue::String(users_str),
|
||||
unit: None,
|
||||
description: Some("Currently active users".to_string()),
|
||||
status: Status::Ok,
|
||||
timestamp,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Failed to get active users: {}", e);
|
||||
metrics.push(Metric {
|
||||
name: "system_active_users".to_string(),
|
||||
value: MetricValue::String("unknown".to_string()),
|
||||
unit: None,
|
||||
description: Some("Active users (failed to detect)".to_string()),
|
||||
status: Status::Unknown,
|
||||
timestamp,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Collect config hash
|
||||
match self.get_config_hash() {
|
||||
|
||||
@@ -160,27 +160,37 @@ impl HostStatusManager {
|
||||
|
||||
/// Process a metric - updates status and queues for aggregated notifications if status changed
|
||||
pub async fn process_metric(&mut self, metric: &Metric, _notification_manager: &mut crate::notifications::NotificationManager) -> bool {
|
||||
let old_status = self.service_statuses.get(&metric.name).copied();
|
||||
let new_status = metric.status;
|
||||
let old_service_status = self.service_statuses.get(&metric.name).copied();
|
||||
let old_host_status = self.current_host_status;
|
||||
let new_service_status = metric.status;
|
||||
|
||||
// Update status
|
||||
self.update_service_status(metric.name.clone(), new_status);
|
||||
// Update status (this recalculates host status internally)
|
||||
self.update_service_status(metric.name.clone(), new_service_status);
|
||||
|
||||
// Check if status actually changed (ignore first-time status setting)
|
||||
if let Some(old_status) = old_status {
|
||||
if old_status != new_status {
|
||||
debug!("Status change detected for {}: {:?} -> {:?}", metric.name, old_status, new_status);
|
||||
let new_host_status = self.current_host_status;
|
||||
let mut status_changed = false;
|
||||
|
||||
// Check if service status actually changed (ignore first-time status setting)
|
||||
if let Some(old_service_status) = old_service_status {
|
||||
if old_service_status != new_service_status {
|
||||
debug!("Service status change detected for {}: {:?} -> {:?}", metric.name, old_service_status, new_service_status);
|
||||
|
||||
// Queue change for aggregated notification (not immediate)
|
||||
self.queue_status_change(&metric.name, old_status, new_status);
|
||||
self.queue_status_change(&metric.name, old_service_status, new_service_status);
|
||||
|
||||
return true; // Status changed - caller should trigger immediate transmission
|
||||
status_changed = true;
|
||||
}
|
||||
} else {
|
||||
debug!("Initial status set for {}: {:?}", metric.name, new_status);
|
||||
debug!("Initial status set for {}: {:?}", metric.name, new_service_status);
|
||||
}
|
||||
|
||||
false // No status change (or first-time status)
|
||||
// Check if host status changed (this should trigger immediate transmission)
|
||||
if old_host_status != new_host_status {
|
||||
debug!("Host status change detected: {:?} -> {:?}", old_host_status, new_host_status);
|
||||
status_changed = true;
|
||||
}
|
||||
|
||||
status_changed // Return true if either service or host status changed
|
||||
}
|
||||
|
||||
/// Queue status change for aggregated notification
|
||||
|
||||
Reference in New Issue
Block a user