Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3f6dffa66e | |||
| 1b64fbde3d | |||
| 4f4c3b0d6e | |||
| bd20f0cae1 | |||
| 11c9a5f9d2 | |||
| aeae60146d |
19
CLAUDE.md
19
CLAUDE.md
@@ -20,11 +20,28 @@ A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure.
|
||||
- Persistent storage survives agent restarts
|
||||
- Automatic flag clearing when services are restarted via dashboard
|
||||
|
||||
### Custom Service Logs
|
||||
- Configure service-specific log file paths per host in dashboard config
|
||||
- Press `L` on any service to view custom log files via `tail -f`
|
||||
- Configuration format in dashboard config:
|
||||
```toml
|
||||
[service_logs]
|
||||
hostname1 = [
|
||||
{ service_name = "nginx", log_file_path = "/var/log/nginx/access.log" },
|
||||
{ service_name = "app", log_file_path = "/var/log/myapp/app.log" }
|
||||
]
|
||||
hostname2 = [
|
||||
{ service_name = "database", log_file_path = "/var/log/postgres/postgres.log" }
|
||||
]
|
||||
```
|
||||
|
||||
### Service Management
|
||||
- **Direct Control**: Arrow keys (↑↓) or vim keys (j/k) navigate services
|
||||
- **Service Actions**:
|
||||
- `s` - Start service (sends UserStart command)
|
||||
- `S` - Stop service (sends UserStop command)
|
||||
- `J` - Show service logs (journalctl in tmux popup)
|
||||
- `L` - Show custom log files (tail -f custom paths in tmux popup)
|
||||
- `R` - Rebuild current host
|
||||
- **Visual Status**: Green ● (active), Yellow ◐ (inactive), Red ◯ (failed)
|
||||
- **Transitional Icons**: Blue arrows during operations
|
||||
@@ -32,6 +49,8 @@ A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure.
|
||||
### Navigation
|
||||
- **Tab**: Switch between hosts
|
||||
- **↑↓ or j/k**: Select services
|
||||
- **J**: Show service logs (journalctl)
|
||||
- **L**: Show custom log files
|
||||
- **q**: Quit dashboard
|
||||
|
||||
## Core Architecture Principles
|
||||
|
||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.43"
|
||||
version = "0.1.49"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@@ -291,7 +291,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.43"
|
||||
version = "0.1.49"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -314,7 +314,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.43"
|
||||
version = "0.1.49"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
|
||||
@@ -87,6 +87,7 @@ cm-dashboard • ● cmbox ● srv01 ● srv02 ● steambox
|
||||
- **↑↓ or j/k**: Navigate services
|
||||
- **s**: Start selected service (UserStart)
|
||||
- **S**: Stop selected service (UserStop)
|
||||
- **J**: Show service logs (journalctl in tmux popup)
|
||||
- **R**: Rebuild current host
|
||||
- **q**: Quit
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.44"
|
||||
version = "0.1.50"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -180,6 +180,9 @@ impl Agent {
|
||||
let version_metric = self.get_agent_version_metric();
|
||||
metrics.push(version_metric);
|
||||
|
||||
// Check for user-stopped services that are now active and clear their flags
|
||||
self.clear_user_stopped_flags_for_active_services(&metrics);
|
||||
|
||||
if metrics.is_empty() {
|
||||
debug!("No metrics to broadcast");
|
||||
return Ok(());
|
||||
@@ -288,7 +291,7 @@ impl Agent {
|
||||
|
||||
info!("Executing systemctl {} {} (user action: {})", action_str, service_name, is_user_action);
|
||||
|
||||
// Handle user-stopped service tracking before systemctl execution
|
||||
// Handle user-stopped service tracking before systemctl execution (stop only)
|
||||
match action {
|
||||
ServiceAction::UserStop => {
|
||||
info!("Marking service '{}' as user-stopped", service_name);
|
||||
@@ -299,15 +302,6 @@ impl Agent {
|
||||
UserStoppedServiceTracker::update_global(&self.service_tracker);
|
||||
}
|
||||
}
|
||||
ServiceAction::UserStart => {
|
||||
info!("Clearing user-stopped flag for service '{}'", service_name);
|
||||
if let Err(e) = self.service_tracker.clear_user_stopped(service_name) {
|
||||
error!("Failed to clear user-stopped flag: {}", e);
|
||||
} else {
|
||||
// Sync to global tracker
|
||||
UserStoppedServiceTracker::update_global(&self.service_tracker);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -323,6 +317,9 @@ impl Agent {
|
||||
if !output.stdout.is_empty() {
|
||||
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
|
||||
}
|
||||
|
||||
// Note: User-stopped flag will be cleared by systemd collector
|
||||
// when service actually reaches 'active' state, not here
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
error!("Service {} {} failed: {}", service_name, action_str, stderr);
|
||||
@@ -342,4 +339,33 @@ impl Agent {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check metrics for user-stopped services that are now active and clear their flags
|
||||
fn clear_user_stopped_flags_for_active_services(&mut self, metrics: &[Metric]) {
|
||||
for metric in metrics {
|
||||
// Look for service status metrics that are active
|
||||
if metric.name.starts_with("service_") && metric.name.ends_with("_status") {
|
||||
if let MetricValue::String(status) = &metric.value {
|
||||
if status == "active" {
|
||||
// Extract service name from metric name (service_nginx_status -> nginx)
|
||||
let service_name = metric.name
|
||||
.strip_prefix("service_")
|
||||
.and_then(|s| s.strip_suffix("_status"))
|
||||
.unwrap_or("");
|
||||
|
||||
if !service_name.is_empty() && UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
info!("Service '{}' is now active - clearing user-stopped flag", service_name);
|
||||
if let Err(e) = self.service_tracker.clear_user_stopped(service_name) {
|
||||
error!("Failed to clear user-stopped flag for '{}': {}", service_name, e);
|
||||
} else {
|
||||
// Sync to global tracker
|
||||
UserStoppedServiceTracker::update_global(&self.service_tracker);
|
||||
debug!("Cleared user-stopped flag for service '{}'", service_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -357,7 +357,15 @@ impl SystemdCollector {
|
||||
/// Calculate service status, taking user-stopped services into account
|
||||
fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status {
|
||||
match active_status.to_lowercase().as_str() {
|
||||
"active" => Status::Ok,
|
||||
"active" => {
|
||||
// If service is now active and was marked as user-stopped, clear the flag
|
||||
if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
debug!("Service '{}' is now active - clearing user-stopped flag", service_name);
|
||||
// Note: We can't directly clear here because this is a read-only context
|
||||
// The agent will need to handle this differently
|
||||
}
|
||||
Status::Ok
|
||||
},
|
||||
"inactive" | "dead" => {
|
||||
// Check if this service was stopped by user action
|
||||
if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
@@ -368,7 +376,15 @@ impl SystemdCollector {
|
||||
}
|
||||
},
|
||||
"failed" | "error" => Status::Critical,
|
||||
"activating" | "deactivating" | "reloading" | "start" | "stop" | "restart" => Status::Pending,
|
||||
"activating" | "deactivating" | "reloading" | "start" | "stop" | "restart" => {
|
||||
// For user-stopped services that are transitioning, keep them as OK during transition
|
||||
if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
debug!("Service '{}' is transitioning but was user-stopped - treating as OK", service_name);
|
||||
Status::Ok
|
||||
} else {
|
||||
Status::Pending
|
||||
}
|
||||
},
|
||||
_ => Status::Unknown,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -272,11 +272,13 @@ impl HostStatusManager {
|
||||
/// Check if a status change is significant enough for notification
|
||||
fn is_significant_change(&self, old_status: Status, new_status: Status) -> bool {
|
||||
match (old_status, new_status) {
|
||||
// Always notify on problems
|
||||
// Don't notify on transitions from Unknown (startup/restart scenario)
|
||||
(Status::Unknown, _) => false,
|
||||
// Always notify on problems (but not from Unknown)
|
||||
(_, Status::Warning) | (_, Status::Critical) => true,
|
||||
// Only notify on recovery if it's from a problem state to OK and all services are OK
|
||||
(Status::Warning | Status::Critical, Status::Ok) => self.current_host_status == Status::Ok,
|
||||
// Don't notify on startup or other transitions
|
||||
// Don't notify on other transitions
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
@@ -374,8 +376,8 @@ impl HostStatusManager {
|
||||
details.push('\n');
|
||||
}
|
||||
|
||||
// Show recoveries
|
||||
if !recovery_changes.is_empty() {
|
||||
// Show recoveries only if host status is now OK (all services recovered)
|
||||
if !recovery_changes.is_empty() && aggregated.host_status_final == Status::Ok {
|
||||
details.push_str(&format!("✅ RECOVERIES ({}):\n", recovery_changes.len()));
|
||||
for change in recovery_changes {
|
||||
details.push_str(&format!(" {}\n", change));
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.44"
|
||||
version = "0.1.50"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -9,6 +9,7 @@ pub struct DashboardConfig {
|
||||
pub hosts: HostsConfig,
|
||||
pub system: SystemConfig,
|
||||
pub ssh: SshConfig,
|
||||
pub service_logs: std::collections::HashMap<String, Vec<ServiceLogConfig>>,
|
||||
}
|
||||
|
||||
/// ZMQ consumer configuration
|
||||
@@ -39,6 +40,13 @@ pub struct SshConfig {
|
||||
pub rebuild_alias: String,
|
||||
}
|
||||
|
||||
/// Service log file configuration per host
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ServiceLogConfig {
|
||||
pub service_name: String,
|
||||
pub log_file_path: String,
|
||||
}
|
||||
|
||||
impl DashboardConfig {
|
||||
pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||
let path = path.as_ref();
|
||||
|
||||
@@ -260,6 +260,10 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
|
||||
std::process::Command::new("tmux")
|
||||
.arg("display-popup")
|
||||
.arg("-w")
|
||||
.arg("80%")
|
||||
.arg("-h")
|
||||
.arg("80%")
|
||||
.arg(&logo_and_rebuild)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
@@ -281,6 +285,57 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
}
|
||||
}
|
||||
}
|
||||
KeyCode::Char('J') => {
|
||||
// Show service logs via journalctl in tmux popup
|
||||
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||
let journalctl_command = format!(
|
||||
"ssh -tt {}@{} 'journalctl -u {}.service -f --no-pager -n 50'",
|
||||
self.config.ssh.rebuild_user,
|
||||
hostname,
|
||||
service_name
|
||||
);
|
||||
|
||||
std::process::Command::new("tmux")
|
||||
.arg("display-popup")
|
||||
.arg("-w")
|
||||
.arg("80%")
|
||||
.arg("-h")
|
||||
.arg("80%")
|
||||
.arg("-T")
|
||||
.arg(format!("Logs: {}", service_name))
|
||||
.arg(&journalctl_command)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
}
|
||||
}
|
||||
KeyCode::Char('L') => {
|
||||
// Show custom service log file in tmux popup
|
||||
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||
// Check if this service has a custom log file configured
|
||||
if let Some(host_logs) = self.config.service_logs.get(&hostname) {
|
||||
if let Some(log_config) = host_logs.iter().find(|config| config.service_name == service_name) {
|
||||
let tail_command = format!(
|
||||
"ssh -tt {}@{} 'tail -f {}'",
|
||||
self.config.ssh.rebuild_user,
|
||||
hostname,
|
||||
log_config.log_file_path
|
||||
);
|
||||
|
||||
std::process::Command::new("tmux")
|
||||
.arg("display-popup")
|
||||
.arg("-w")
|
||||
.arg("80%")
|
||||
.arg("-h")
|
||||
.arg("80%")
|
||||
.arg("-T")
|
||||
.arg(format!("Custom Log: {}", service_name))
|
||||
.arg(&tail_command)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
KeyCode::Char('b') => {
|
||||
// Trigger backup
|
||||
if let Some(hostname) = self.current_host.clone() {
|
||||
@@ -686,10 +741,12 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
let mut shortcuts = Vec::new();
|
||||
|
||||
// Global shortcuts
|
||||
shortcuts.push("Tab: Switch Host".to_string());
|
||||
shortcuts.push("↑↓/jk: Select Service".to_string());
|
||||
shortcuts.push("r: Rebuild Host".to_string());
|
||||
shortcuts.push("s/S: Start/Stop Service".to_string());
|
||||
shortcuts.push("Tab: Host".to_string());
|
||||
shortcuts.push("↑↓/jk: Select".to_string());
|
||||
shortcuts.push("r: Rebuild".to_string());
|
||||
shortcuts.push("s/S: Start/Stop".to_string());
|
||||
shortcuts.push("J: Logs".to_string());
|
||||
shortcuts.push("L: Custom".to_string());
|
||||
|
||||
// Always show quit
|
||||
shortcuts.push("q: Quit".to_string());
|
||||
|
||||
@@ -113,13 +113,10 @@ impl ServicesWidget {
|
||||
name.to_string()
|
||||
};
|
||||
|
||||
// Parent services always show active/inactive status
|
||||
// Parent services always show actual systemctl status
|
||||
let status_str = match info.widget_status {
|
||||
Status::Ok => "active".to_string(),
|
||||
Status::Pending => "pending".to_string(),
|
||||
Status::Warning => "inactive".to_string(),
|
||||
Status::Critical => "failed".to_string(),
|
||||
Status::Unknown => "unknown".to_string(),
|
||||
_ => info.status.clone(), // Use actual status from agent (active/inactive/failed)
|
||||
};
|
||||
|
||||
format!(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.44"
|
||||
version = "0.1.50"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
Reference in New Issue
Block a user