Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4f4c3b0d6e | |||
| bd20f0cae1 |
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.45"
|
||||
version = "0.1.47"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@@ -291,7 +291,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.45"
|
||||
version = "0.1.47"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -314,7 +314,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.45"
|
||||
version = "0.1.47"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.46"
|
||||
version = "0.1.48"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -180,6 +180,9 @@ impl Agent {
|
||||
let version_metric = self.get_agent_version_metric();
|
||||
metrics.push(version_metric);
|
||||
|
||||
// Check for user-stopped services that are now active and clear their flags
|
||||
self.clear_user_stopped_flags_for_active_services(&metrics);
|
||||
|
||||
if metrics.is_empty() {
|
||||
debug!("No metrics to broadcast");
|
||||
return Ok(());
|
||||
@@ -315,16 +318,8 @@ impl Agent {
|
||||
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
|
||||
}
|
||||
|
||||
// Clear user-stopped flag AFTER successful start command
|
||||
if matches!(action, ServiceAction::UserStart) {
|
||||
info!("Clearing user-stopped flag for service '{}' after successful start", service_name);
|
||||
if let Err(e) = self.service_tracker.clear_user_stopped(service_name) {
|
||||
error!("Failed to clear user-stopped flag: {}", e);
|
||||
} else {
|
||||
// Sync to global tracker
|
||||
UserStoppedServiceTracker::update_global(&self.service_tracker);
|
||||
}
|
||||
}
|
||||
// Note: User-stopped flag will be cleared by systemd collector
|
||||
// when service actually reaches 'active' state, not here
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
error!("Service {} {} failed: {}", service_name, action_str, stderr);
|
||||
@@ -344,4 +339,33 @@ impl Agent {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check metrics for user-stopped services that are now active and clear their flags
|
||||
fn clear_user_stopped_flags_for_active_services(&mut self, metrics: &[Metric]) {
|
||||
for metric in metrics {
|
||||
// Look for service status metrics that are active
|
||||
if metric.name.starts_with("service_") && metric.name.ends_with("_status") {
|
||||
if let MetricValue::String(status) = &metric.value {
|
||||
if status == "active" {
|
||||
// Extract service name from metric name (service_nginx_status -> nginx)
|
||||
let service_name = metric.name
|
||||
.strip_prefix("service_")
|
||||
.and_then(|s| s.strip_suffix("_status"))
|
||||
.unwrap_or("");
|
||||
|
||||
if !service_name.is_empty() && UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
info!("Service '{}' is now active - clearing user-stopped flag", service_name);
|
||||
if let Err(e) = self.service_tracker.clear_user_stopped(service_name) {
|
||||
error!("Failed to clear user-stopped flag for '{}': {}", service_name, e);
|
||||
} else {
|
||||
// Sync to global tracker
|
||||
UserStoppedServiceTracker::update_global(&self.service_tracker);
|
||||
debug!("Cleared user-stopped flag for service '{}'", service_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -357,7 +357,15 @@ impl SystemdCollector {
|
||||
/// Calculate service status, taking user-stopped services into account
|
||||
fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status {
|
||||
match active_status.to_lowercase().as_str() {
|
||||
"active" => Status::Ok,
|
||||
"active" => {
|
||||
// If service is now active and was marked as user-stopped, clear the flag
|
||||
if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
debug!("Service '{}' is now active - clearing user-stopped flag", service_name);
|
||||
// Note: We can't directly clear here because this is a read-only context
|
||||
// The agent will need to handle this differently
|
||||
}
|
||||
Status::Ok
|
||||
},
|
||||
"inactive" | "dead" => {
|
||||
// Check if this service was stopped by user action
|
||||
if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
@@ -368,7 +376,15 @@ impl SystemdCollector {
|
||||
}
|
||||
},
|
||||
"failed" | "error" => Status::Critical,
|
||||
"activating" | "deactivating" | "reloading" | "start" | "stop" | "restart" => Status::Pending,
|
||||
"activating" | "deactivating" | "reloading" | "start" | "stop" | "restart" => {
|
||||
// For user-stopped services that are transitioning, keep them as OK during transition
|
||||
if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
debug!("Service '{}' is transitioning but was user-stopped - treating as OK", service_name);
|
||||
Status::Ok
|
||||
} else {
|
||||
Status::Pending
|
||||
}
|
||||
},
|
||||
_ => Status::Unknown,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -272,11 +272,13 @@ impl HostStatusManager {
|
||||
/// Check if a status change is significant enough for notification
|
||||
fn is_significant_change(&self, old_status: Status, new_status: Status) -> bool {
|
||||
match (old_status, new_status) {
|
||||
// Always notify on problems
|
||||
// Don't notify on transitions from Unknown (startup/restart scenario)
|
||||
(Status::Unknown, _) => false,
|
||||
// Always notify on problems (but not from Unknown)
|
||||
(_, Status::Warning) | (_, Status::Critical) => true,
|
||||
// Only notify on recovery if it's from a problem state to OK and all services are OK
|
||||
(Status::Warning | Status::Critical, Status::Ok) => self.current_host_status == Status::Ok,
|
||||
// Don't notify on startup or other transitions
|
||||
// Don't notify on other transitions
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
@@ -374,8 +376,8 @@ impl HostStatusManager {
|
||||
details.push('\n');
|
||||
}
|
||||
|
||||
// Show recoveries
|
||||
if !recovery_changes.is_empty() {
|
||||
// Show recoveries only if host status is now OK (all services recovered)
|
||||
if !recovery_changes.is_empty() && aggregated.host_status_final == Status::Ok {
|
||||
details.push_str(&format!("✅ RECOVERIES ({}):\n", recovery_changes.len()));
|
||||
for change in recovery_changes {
|
||||
details.push_str(&format!(" {}\n", change));
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.46"
|
||||
version = "0.1.48"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -302,7 +302,7 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
.arg("-h")
|
||||
.arg("80%")
|
||||
.arg("-t")
|
||||
.arg(format!("Logs: {}.service", service_name))
|
||||
.arg(format!("Logs: {}", service_name))
|
||||
.arg(&journalctl_command)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.46"
|
||||
version = "0.1.48"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
Reference in New Issue
Block a user