Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e8e50ef9bb | |||
| 0faed9309e | |||
| c980346d05 | |||
| 3e3d3f0c2b | |||
| 9eb7444d56 | |||
| 278d1763aa | |||
| f874264e13 | |||
| 5f6e47ece5 | |||
| 0e7cf24dbb | |||
| 2d080a2f51 | |||
| 6179bd51a7 | |||
| 57de4c366a | |||
| e18778e962 | |||
| e4469a0ebf | |||
| 6fedf4c7fc | |||
| 3f6dffa66e | |||
| 1b64fbde3d | |||
| 4f4c3b0d6e |
17
CLAUDE.md
17
CLAUDE.md
@@ -20,12 +20,28 @@ A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure.
|
|||||||
- Persistent storage survives agent restarts
|
- Persistent storage survives agent restarts
|
||||||
- Automatic flag clearing when services are restarted via dashboard
|
- Automatic flag clearing when services are restarted via dashboard
|
||||||
|
|
||||||
|
### Custom Service Logs
|
||||||
|
- Configure service-specific log file paths per host in dashboard config
|
||||||
|
- Press `L` on any service to view custom log files via `tail -f`
|
||||||
|
- Configuration format in dashboard config:
|
||||||
|
```toml
|
||||||
|
[service_logs]
|
||||||
|
hostname1 = [
|
||||||
|
{ service_name = "nginx", log_file_path = "/var/log/nginx/access.log" },
|
||||||
|
{ service_name = "app", log_file_path = "/var/log/myapp/app.log" }
|
||||||
|
]
|
||||||
|
hostname2 = [
|
||||||
|
{ service_name = "database", log_file_path = "/var/log/postgres/postgres.log" }
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
### Service Management
|
### Service Management
|
||||||
- **Direct Control**: Arrow keys (↑↓) or vim keys (j/k) navigate services
|
- **Direct Control**: Arrow keys (↑↓) or vim keys (j/k) navigate services
|
||||||
- **Service Actions**:
|
- **Service Actions**:
|
||||||
- `s` - Start service (sends UserStart command)
|
- `s` - Start service (sends UserStart command)
|
||||||
- `S` - Stop service (sends UserStop command)
|
- `S` - Stop service (sends UserStop command)
|
||||||
- `J` - Show service logs (journalctl in tmux popup)
|
- `J` - Show service logs (journalctl in tmux popup)
|
||||||
|
- `L` - Show custom log files (tail -f custom paths in tmux popup)
|
||||||
- `R` - Rebuild current host
|
- `R` - Rebuild current host
|
||||||
- **Visual Status**: Green ● (active), Yellow ◐ (inactive), Red ◯ (failed)
|
- **Visual Status**: Green ● (active), Yellow ◐ (inactive), Red ◯ (failed)
|
||||||
- **Transitional Icons**: Blue arrows during operations
|
- **Transitional Icons**: Blue arrows during operations
|
||||||
@@ -34,6 +50,7 @@ A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure.
|
|||||||
- **Tab**: Switch between hosts
|
- **Tab**: Switch between hosts
|
||||||
- **↑↓ or j/k**: Select services
|
- **↑↓ or j/k**: Select services
|
||||||
- **J**: Show service logs (journalctl)
|
- **J**: Show service logs (journalctl)
|
||||||
|
- **L**: Show custom log files
|
||||||
- **q**: Quit dashboard
|
- **q**: Quit dashboard
|
||||||
|
|
||||||
## Core Architecture Principles
|
## Core Architecture Principles
|
||||||
|
|||||||
13
Cargo.lock
generated
13
Cargo.lock
generated
@@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.46"
|
version = "0.1.63"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
@@ -286,12 +286,13 @@ dependencies = [
|
|||||||
"toml",
|
"toml",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
|
"wake-on-lan",
|
||||||
"zmq",
|
"zmq",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.46"
|
version = "0.1.63"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@@ -314,7 +315,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.46"
|
version = "0.1.63"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -2064,6 +2065,12 @@ version = "0.9.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wake-on-lan"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1ccf60b60ad7e5b1b37372c5134cbcab4db0706c231d212e0c643a077462bc8f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "walkdir"
|
name = "walkdir"
|
||||||
version = "2.5.0"
|
version = "2.5.0"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.47"
|
version = "0.1.64"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@@ -78,10 +78,11 @@ impl Agent {
|
|||||||
info!("Initial metric collection completed - all data cached and ready");
|
info!("Initial metric collection completed - all data cached and ready");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Separate intervals for collection, transmission, and email notifications
|
// Separate intervals for collection, transmission, heartbeat, and email notifications
|
||||||
let mut collection_interval =
|
let mut collection_interval =
|
||||||
interval(Duration::from_secs(self.config.collection_interval_seconds));
|
interval(Duration::from_secs(self.config.collection_interval_seconds));
|
||||||
let mut transmission_interval = interval(Duration::from_secs(self.config.zmq.transmission_interval_seconds));
|
let mut transmission_interval = interval(Duration::from_secs(self.config.zmq.transmission_interval_seconds));
|
||||||
|
let mut heartbeat_interval = interval(Duration::from_secs(self.config.zmq.heartbeat_interval_seconds));
|
||||||
let mut notification_interval = interval(Duration::from_secs(self.config.notifications.aggregation_interval_seconds));
|
let mut notification_interval = interval(Duration::from_secs(self.config.notifications.aggregation_interval_seconds));
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
@@ -98,6 +99,12 @@ impl Agent {
|
|||||||
error!("Failed to broadcast metrics: {}", e);
|
error!("Failed to broadcast metrics: {}", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_ = heartbeat_interval.tick() => {
|
||||||
|
// Send standalone heartbeat for host connectivity detection
|
||||||
|
if let Err(e) = self.send_heartbeat().await {
|
||||||
|
error!("Failed to send heartbeat: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
_ = notification_interval.tick() => {
|
_ = notification_interval.tick() => {
|
||||||
// Process batched email notifications (separate from dashboard updates)
|
// Process batched email notifications (separate from dashboard updates)
|
||||||
if let Err(e) = self.host_status_manager.process_pending_notifications(&mut self.notification_manager).await {
|
if let Err(e) = self.host_status_manager.process_pending_notifications(&mut self.notification_manager).await {
|
||||||
@@ -180,6 +187,10 @@ impl Agent {
|
|||||||
let version_metric = self.get_agent_version_metric();
|
let version_metric = self.get_agent_version_metric();
|
||||||
metrics.push(version_metric);
|
metrics.push(version_metric);
|
||||||
|
|
||||||
|
// Add heartbeat metric for host connectivity detection
|
||||||
|
let heartbeat_metric = self.get_heartbeat_metric();
|
||||||
|
metrics.push(heartbeat_metric);
|
||||||
|
|
||||||
// Check for user-stopped services that are now active and clear their flags
|
// Check for user-stopped services that are now active and clear their flags
|
||||||
self.clear_user_stopped_flags_for_active_services(&metrics);
|
self.clear_user_stopped_flags_for_active_services(&metrics);
|
||||||
|
|
||||||
@@ -201,6 +212,12 @@ impl Agent {
|
|||||||
async fn process_metrics(&mut self, metrics: &[Metric]) -> bool {
|
async fn process_metrics(&mut self, metrics: &[Metric]) -> bool {
|
||||||
let mut status_changed = false;
|
let mut status_changed = false;
|
||||||
for metric in metrics {
|
for metric in metrics {
|
||||||
|
// Filter excluded metrics from email notification processing only
|
||||||
|
if self.config.notifications.exclude_email_metrics.contains(&metric.name) {
|
||||||
|
debug!("Excluding metric '{}' from email notification processing", metric.name);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if self.host_status_manager.process_metric(metric, &mut self.notification_manager).await {
|
if self.host_status_manager.process_metric(metric, &mut self.notification_manager).await {
|
||||||
status_changed = true;
|
status_changed = true;
|
||||||
}
|
}
|
||||||
@@ -226,6 +243,35 @@ impl Agent {
|
|||||||
format!("v{}", env!("CARGO_PKG_VERSION"))
|
format!("v{}", env!("CARGO_PKG_VERSION"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create heartbeat metric for host connectivity detection
|
||||||
|
fn get_heartbeat_metric(&self) -> Metric {
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
|
let timestamp = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_secs();
|
||||||
|
|
||||||
|
Metric::new(
|
||||||
|
"agent_heartbeat".to_string(),
|
||||||
|
MetricValue::Integer(timestamp as i64),
|
||||||
|
Status::Ok,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Send standalone heartbeat for connectivity detection
|
||||||
|
async fn send_heartbeat(&mut self) -> Result<()> {
|
||||||
|
let heartbeat_metric = self.get_heartbeat_metric();
|
||||||
|
let message = MetricMessage::new(
|
||||||
|
self.hostname.clone(),
|
||||||
|
vec![heartbeat_metric],
|
||||||
|
);
|
||||||
|
|
||||||
|
self.zmq_handler.publish_metrics(&message).await?;
|
||||||
|
debug!("Sent standalone heartbeat for connectivity detection");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
async fn handle_commands(&mut self) -> Result<()> {
|
async fn handle_commands(&mut self) -> Result<()> {
|
||||||
// Try to receive commands (non-blocking)
|
// Try to receive commands (non-blocking)
|
||||||
match self.zmq_handler.try_receive_command() {
|
match self.zmq_handler.try_receive_command() {
|
||||||
|
|||||||
@@ -140,6 +140,7 @@ impl Collector for BackupCollector {
|
|||||||
Status::Warning => "warning".to_string(),
|
Status::Warning => "warning".to_string(),
|
||||||
Status::Critical => "critical".to_string(),
|
Status::Critical => "critical".to_string(),
|
||||||
Status::Unknown => "unknown".to_string(),
|
Status::Unknown => "unknown".to_string(),
|
||||||
|
Status::Offline => "offline".to_string(),
|
||||||
}),
|
}),
|
||||||
status: overall_status,
|
status: overall_status,
|
||||||
timestamp,
|
timestamp,
|
||||||
@@ -202,6 +203,7 @@ impl Collector for BackupCollector {
|
|||||||
Status::Warning => "warning".to_string(),
|
Status::Warning => "warning".to_string(),
|
||||||
Status::Critical => "critical".to_string(),
|
Status::Critical => "critical".to_string(),
|
||||||
Status::Unknown => "unknown".to_string(),
|
Status::Unknown => "unknown".to_string(),
|
||||||
|
Status::Offline => "offline".to_string(),
|
||||||
}),
|
}),
|
||||||
status: service_status,
|
status: service_status,
|
||||||
timestamp,
|
timestamp,
|
||||||
|
|||||||
@@ -66,8 +66,6 @@ impl ZmqHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Send heartbeat (placeholder for future use)
|
|
||||||
|
|
||||||
/// Try to receive a command (non-blocking)
|
/// Try to receive a command (non-blocking)
|
||||||
pub fn try_receive_command(&self) -> Result<Option<AgentCommand>> {
|
pub fn try_receive_command(&self) -> Result<Option<AgentCommand>> {
|
||||||
match self.command_receiver.recv_bytes(zmq::DONTWAIT) {
|
match self.command_receiver.recv_bytes(zmq::DONTWAIT) {
|
||||||
|
|||||||
@@ -25,9 +25,10 @@ pub struct ZmqConfig {
|
|||||||
pub publisher_port: u16,
|
pub publisher_port: u16,
|
||||||
pub command_port: u16,
|
pub command_port: u16,
|
||||||
pub bind_address: String,
|
pub bind_address: String,
|
||||||
pub timeout_ms: u64,
|
|
||||||
pub heartbeat_interval_ms: u64,
|
|
||||||
pub transmission_interval_seconds: u64,
|
pub transmission_interval_seconds: u64,
|
||||||
|
/// Heartbeat transmission interval in seconds for host connectivity detection
|
||||||
|
#[serde(default = "default_heartbeat_interval_seconds")]
|
||||||
|
pub heartbeat_interval_seconds: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Collector configuration
|
/// Collector configuration
|
||||||
@@ -146,9 +147,16 @@ pub struct NotificationConfig {
|
|||||||
pub rate_limit_minutes: u64,
|
pub rate_limit_minutes: u64,
|
||||||
/// Email notification batching interval in seconds (default: 60)
|
/// Email notification batching interval in seconds (default: 60)
|
||||||
pub aggregation_interval_seconds: u64,
|
pub aggregation_interval_seconds: u64,
|
||||||
|
/// List of metric names to exclude from email notifications
|
||||||
|
#[serde(default)]
|
||||||
|
pub exclude_email_metrics: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn default_heartbeat_interval_seconds() -> u64 {
|
||||||
|
5
|
||||||
|
}
|
||||||
|
|
||||||
impl AgentConfig {
|
impl AgentConfig {
|
||||||
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||||
loader::load_config(path)
|
loader::load_config(path)
|
||||||
|
|||||||
@@ -19,10 +19,6 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
|
|||||||
bail!("ZMQ bind address cannot be empty");
|
bail!("ZMQ bind address cannot be empty");
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.zmq.timeout_ms == 0 {
|
|
||||||
bail!("ZMQ timeout cannot be 0");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate collection interval
|
// Validate collection interval
|
||||||
if config.collection_interval_seconds == 0 {
|
if config.collection_interval_seconds == 0 {
|
||||||
bail!("Collection interval cannot be 0");
|
bail!("Collection interval cannot be 0");
|
||||||
|
|||||||
@@ -272,11 +272,13 @@ impl HostStatusManager {
|
|||||||
/// Check if a status change is significant enough for notification
|
/// Check if a status change is significant enough for notification
|
||||||
fn is_significant_change(&self, old_status: Status, new_status: Status) -> bool {
|
fn is_significant_change(&self, old_status: Status, new_status: Status) -> bool {
|
||||||
match (old_status, new_status) {
|
match (old_status, new_status) {
|
||||||
// Always notify on problems
|
// Don't notify on transitions from Unknown (startup/restart scenario)
|
||||||
|
(Status::Unknown, _) => false,
|
||||||
|
// Always notify on problems (but not from Unknown)
|
||||||
(_, Status::Warning) | (_, Status::Critical) => true,
|
(_, Status::Warning) | (_, Status::Critical) => true,
|
||||||
// Only notify on recovery if it's from a problem state to OK and all services are OK
|
// Only notify on recovery if it's from a problem state to OK and all services are OK
|
||||||
(Status::Warning | Status::Critical, Status::Ok) => self.current_host_status == Status::Ok,
|
(Status::Warning | Status::Critical, Status::Ok) => self.current_host_status == Status::Ok,
|
||||||
// Don't notify on startup or other transitions
|
// Don't notify on other transitions
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -374,8 +376,8 @@ impl HostStatusManager {
|
|||||||
details.push('\n');
|
details.push('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Show recoveries
|
// Show recoveries only if host status is now OK (all services recovered)
|
||||||
if !recovery_changes.is_empty() {
|
if !recovery_changes.is_empty() && aggregated.host_status_final == Status::Ok {
|
||||||
details.push_str(&format!("✅ RECOVERIES ({}):\n", recovery_changes.len()));
|
details.push_str(&format!("✅ RECOVERIES ({}):\n", recovery_changes.len()));
|
||||||
for change in recovery_changes {
|
for change in recovery_changes {
|
||||||
details.push_str(&format!(" {}\n", change));
|
details.push_str(&format!(" {}\n", change));
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.47"
|
version = "0.1.64"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
@@ -18,4 +18,5 @@ tracing-subscriber = { workspace = true }
|
|||||||
ratatui = { workspace = true }
|
ratatui = { workspace = true }
|
||||||
crossterm = { workspace = true }
|
crossterm = { workspace = true }
|
||||||
toml = { workspace = true }
|
toml = { workspace = true }
|
||||||
gethostname = { workspace = true }
|
gethostname = { workspace = true }
|
||||||
|
wake-on-lan = "0.2"
|
||||||
@@ -22,7 +22,7 @@ pub struct Dashboard {
|
|||||||
terminal: Option<Terminal<CrosstermBackend<io::Stdout>>>,
|
terminal: Option<Terminal<CrosstermBackend<io::Stdout>>>,
|
||||||
headless: bool,
|
headless: bool,
|
||||||
initial_commands_sent: std::collections::HashSet<String>,
|
initial_commands_sent: std::collections::HashSet<String>,
|
||||||
_config: DashboardConfig,
|
config: DashboardConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Dashboard {
|
impl Dashboard {
|
||||||
@@ -67,8 +67,8 @@ impl Dashboard {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Connect to predefined hosts from configuration
|
// Connect to configured hosts from configuration
|
||||||
let hosts = config.hosts.predefined_hosts.clone();
|
let hosts: Vec<String> = config.hosts.keys().cloned().collect();
|
||||||
|
|
||||||
// Try to connect to hosts but don't fail if none are available
|
// Try to connect to hosts but don't fail if none are available
|
||||||
match zmq_consumer.connect_to_predefined_hosts(&hosts).await {
|
match zmq_consumer.connect_to_predefined_hosts(&hosts).await {
|
||||||
@@ -133,7 +133,7 @@ impl Dashboard {
|
|||||||
terminal,
|
terminal,
|
||||||
headless,
|
headless,
|
||||||
initial_commands_sent: std::collections::HashSet::new(),
|
initial_commands_sent: std::collections::HashSet::new(),
|
||||||
_config: config,
|
config,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -149,6 +149,8 @@ impl Dashboard {
|
|||||||
|
|
||||||
let mut last_metrics_check = Instant::now();
|
let mut last_metrics_check = Instant::now();
|
||||||
let metrics_check_interval = Duration::from_millis(100); // Check for metrics every 100ms
|
let metrics_check_interval = Duration::from_millis(100); // Check for metrics every 100ms
|
||||||
|
let mut last_heartbeat_check = Instant::now();
|
||||||
|
let heartbeat_check_interval = Duration::from_secs(1); // Check for host connectivity every 1 second
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
// Handle terminal events (keyboard input) only if not headless
|
// Handle terminal events (keyboard input) only if not headless
|
||||||
@@ -191,6 +193,17 @@ impl Dashboard {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Render UI immediately after handling input for responsive feedback
|
||||||
|
if let Some(ref mut terminal) = self.terminal {
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
if let Err(e) = terminal.draw(|frame| {
|
||||||
|
tui_app.render(frame, &self.metric_store);
|
||||||
|
}) {
|
||||||
|
error!("Error rendering TUI after input: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for new metrics
|
// Check for new metrics
|
||||||
@@ -243,14 +256,8 @@ impl Dashboard {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update TUI with new hosts and metrics (only if not headless)
|
// Update TUI with new metrics (only if not headless)
|
||||||
if let Some(ref mut tui_app) = self.tui_app {
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
let connected_hosts = self
|
|
||||||
.metric_store
|
|
||||||
.get_connected_hosts(Duration::from_secs(30));
|
|
||||||
|
|
||||||
|
|
||||||
tui_app.update_hosts(connected_hosts);
|
|
||||||
tui_app.update_metrics(&self.metric_store);
|
tui_app.update_metrics(&self.metric_store);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -269,6 +276,20 @@ impl Dashboard {
|
|||||||
last_metrics_check = Instant::now();
|
last_metrics_check = Instant::now();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for host connectivity changes (heartbeat timeouts) periodically
|
||||||
|
if last_heartbeat_check.elapsed() >= heartbeat_check_interval {
|
||||||
|
let timeout = Duration::from_secs(self.config.zmq.heartbeat_timeout_seconds);
|
||||||
|
|
||||||
|
// Clean up metrics for offline hosts
|
||||||
|
self.metric_store.cleanup_offline_hosts(timeout);
|
||||||
|
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
let connected_hosts = self.metric_store.get_connected_hosts(timeout);
|
||||||
|
tui_app.update_hosts(connected_hosts);
|
||||||
|
}
|
||||||
|
last_heartbeat_check = Instant::now();
|
||||||
|
}
|
||||||
|
|
||||||
// Render TUI (only if not headless)
|
// Render TUI (only if not headless)
|
||||||
if !self.headless {
|
if !self.headless {
|
||||||
if let Some(ref mut terminal) = self.terminal {
|
if let Some(ref mut terminal) = self.terminal {
|
||||||
|
|||||||
@@ -6,21 +6,29 @@ use std::path::Path;
|
|||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct DashboardConfig {
|
pub struct DashboardConfig {
|
||||||
pub zmq: ZmqConfig,
|
pub zmq: ZmqConfig,
|
||||||
pub hosts: HostsConfig,
|
pub hosts: std::collections::HashMap<String, HostDetails>,
|
||||||
pub system: SystemConfig,
|
pub system: SystemConfig,
|
||||||
pub ssh: SshConfig,
|
pub ssh: SshConfig,
|
||||||
|
pub service_logs: std::collections::HashMap<String, Vec<ServiceLogConfig>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ZMQ consumer configuration
|
/// ZMQ consumer configuration
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct ZmqConfig {
|
pub struct ZmqConfig {
|
||||||
pub subscriber_ports: Vec<u16>,
|
pub subscriber_ports: Vec<u16>,
|
||||||
|
/// Heartbeat timeout in seconds - hosts considered offline if no heartbeat received within this time
|
||||||
|
#[serde(default = "default_heartbeat_timeout_seconds")]
|
||||||
|
pub heartbeat_timeout_seconds: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Hosts configuration
|
fn default_heartbeat_timeout_seconds() -> u64 {
|
||||||
|
10 // Default to 10 seconds - allows for multiple missed heartbeats
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Individual host configuration details
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct HostsConfig {
|
pub struct HostDetails {
|
||||||
pub predefined_hosts: Vec<String>,
|
pub mac_address: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// System configuration
|
/// System configuration
|
||||||
@@ -39,6 +47,13 @@ pub struct SshConfig {
|
|||||||
pub rebuild_alias: String,
|
pub rebuild_alias: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Service log file configuration per host
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ServiceLogConfig {
|
||||||
|
pub service_name: String,
|
||||||
|
pub log_file_path: String,
|
||||||
|
}
|
||||||
|
|
||||||
impl DashboardConfig {
|
impl DashboardConfig {
|
||||||
pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
@@ -60,8 +75,3 @@ impl Default for ZmqConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for HostsConfig {
|
|
||||||
fn default() -> Self {
|
|
||||||
panic!("Dashboard configuration must be loaded from file - no hardcoded defaults allowed")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ pub struct MetricStore {
|
|||||||
current_metrics: HashMap<String, HashMap<String, Metric>>,
|
current_metrics: HashMap<String, HashMap<String, Metric>>,
|
||||||
/// Historical metrics for trending
|
/// Historical metrics for trending
|
||||||
historical_metrics: HashMap<String, Vec<MetricDataPoint>>,
|
historical_metrics: HashMap<String, Vec<MetricDataPoint>>,
|
||||||
/// Last update timestamp per host
|
/// Last heartbeat timestamp per host
|
||||||
last_update: HashMap<String, Instant>,
|
last_heartbeat: HashMap<String, Instant>,
|
||||||
/// Configuration
|
/// Configuration
|
||||||
max_metrics_per_host: usize,
|
max_metrics_per_host: usize,
|
||||||
history_retention: Duration,
|
history_retention: Duration,
|
||||||
@@ -23,7 +23,7 @@ impl MetricStore {
|
|||||||
Self {
|
Self {
|
||||||
current_metrics: HashMap::new(),
|
current_metrics: HashMap::new(),
|
||||||
historical_metrics: HashMap::new(),
|
historical_metrics: HashMap::new(),
|
||||||
last_update: HashMap::new(),
|
last_heartbeat: HashMap::new(),
|
||||||
max_metrics_per_host,
|
max_metrics_per_host,
|
||||||
history_retention: Duration::from_secs(history_retention_hours * 3600),
|
history_retention: Duration::from_secs(history_retention_hours * 3600),
|
||||||
}
|
}
|
||||||
@@ -56,10 +56,13 @@ impl MetricStore {
|
|||||||
|
|
||||||
// Add to history
|
// Add to history
|
||||||
host_history.push(MetricDataPoint { received_at: now });
|
host_history.push(MetricDataPoint { received_at: now });
|
||||||
}
|
|
||||||
|
|
||||||
// Update last update timestamp
|
// Track heartbeat metrics for connectivity detection
|
||||||
self.last_update.insert(hostname.to_string(), now);
|
if metric_name == "agent_heartbeat" {
|
||||||
|
self.last_heartbeat.insert(hostname.to_string(), now);
|
||||||
|
debug!("Updated heartbeat for host {}", hostname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Get metrics count before cleanup
|
// Get metrics count before cleanup
|
||||||
let metrics_count = host_metrics.len();
|
let metrics_count = host_metrics.len();
|
||||||
@@ -88,22 +91,46 @@ impl MetricStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get connected hosts (hosts with recent updates)
|
/// Get connected hosts (hosts with recent heartbeats)
|
||||||
pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> {
|
pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
|
|
||||||
self.last_update
|
self.last_heartbeat
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|(hostname, &last_update)| {
|
.filter_map(|(hostname, &last_heartbeat)| {
|
||||||
if now.duration_since(last_update) <= timeout {
|
if now.duration_since(last_heartbeat) <= timeout {
|
||||||
Some(hostname.clone())
|
Some(hostname.clone())
|
||||||
} else {
|
} else {
|
||||||
|
debug!("Host {} considered offline - last heartbeat was {:?} ago",
|
||||||
|
hostname, now.duration_since(last_heartbeat));
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Clean up data for offline hosts
|
||||||
|
pub fn cleanup_offline_hosts(&mut self, timeout: Duration) {
|
||||||
|
let now = Instant::now();
|
||||||
|
let mut hosts_to_cleanup = Vec::new();
|
||||||
|
|
||||||
|
// Find hosts that are offline (no recent heartbeat)
|
||||||
|
for (hostname, &last_heartbeat) in &self.last_heartbeat {
|
||||||
|
if now.duration_since(last_heartbeat) > timeout {
|
||||||
|
hosts_to_cleanup.push(hostname.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear metrics for offline hosts
|
||||||
|
for hostname in hosts_to_cleanup {
|
||||||
|
if let Some(metrics) = self.current_metrics.remove(&hostname) {
|
||||||
|
info!("Cleared {} metrics for offline host: {}", metrics.len(), hostname);
|
||||||
|
}
|
||||||
|
// Keep heartbeat timestamp for reconnection detection
|
||||||
|
// Don't remove from last_heartbeat to track when host was last seen
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Cleanup old data and enforce limits
|
/// Cleanup old data and enforce limits
|
||||||
fn cleanup_host_data(&mut self, hostname: &str) {
|
fn cleanup_host_data(&mut self, hostname: &str) {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ use ratatui::{
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
use wake_on_lan::MagicPacket;
|
||||||
|
|
||||||
pub mod theme;
|
pub mod theme;
|
||||||
pub mod widgets;
|
pub mod widgets;
|
||||||
@@ -89,19 +90,33 @@ pub struct TuiApp {
|
|||||||
user_navigated_away: bool,
|
user_navigated_away: bool,
|
||||||
/// Dashboard configuration
|
/// Dashboard configuration
|
||||||
config: DashboardConfig,
|
config: DashboardConfig,
|
||||||
|
/// Cached localhost hostname to avoid repeated system calls
|
||||||
|
localhost: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TuiApp {
|
impl TuiApp {
|
||||||
pub fn new(config: DashboardConfig) -> Self {
|
pub fn new(config: DashboardConfig) -> Self {
|
||||||
Self {
|
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
||||||
|
let mut app = Self {
|
||||||
host_widgets: HashMap::new(),
|
host_widgets: HashMap::new(),
|
||||||
current_host: None,
|
current_host: None,
|
||||||
available_hosts: Vec::new(),
|
available_hosts: config.hosts.keys().cloned().collect(),
|
||||||
host_index: 0,
|
host_index: 0,
|
||||||
should_quit: false,
|
should_quit: false,
|
||||||
user_navigated_away: false,
|
user_navigated_away: false,
|
||||||
config,
|
config,
|
||||||
|
localhost,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sort predefined hosts
|
||||||
|
app.available_hosts.sort();
|
||||||
|
|
||||||
|
// Initialize with first host if available
|
||||||
|
if !app.available_hosts.is_empty() {
|
||||||
|
app.current_host = Some(app.available_hosts[0].clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
app
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get or create host widgets for the given hostname
|
/// Get or create host widgets for the given hostname
|
||||||
@@ -120,31 +135,31 @@ impl TuiApp {
|
|||||||
// Only update widgets if we have metrics for this host
|
// Only update widgets if we have metrics for this host
|
||||||
let all_metrics = metric_store.get_metrics_for_host(&hostname);
|
let all_metrics = metric_store.get_metrics_for_host(&hostname);
|
||||||
if !all_metrics.is_empty() {
|
if !all_metrics.is_empty() {
|
||||||
// Get metrics first while hostname is borrowed
|
// Single pass metric categorization for better performance
|
||||||
let cpu_metrics: Vec<&Metric> = all_metrics
|
let mut cpu_metrics = Vec::new();
|
||||||
.iter()
|
let mut memory_metrics = Vec::new();
|
||||||
.filter(|m| {
|
let mut service_metrics = Vec::new();
|
||||||
m.name.starts_with("cpu_")
|
let mut backup_metrics = Vec::new();
|
||||||
|| m.name.contains("c_state_")
|
let mut nixos_metrics = Vec::new();
|
||||||
|| m.name.starts_with("process_top_")
|
let mut disk_metrics = Vec::new();
|
||||||
})
|
|
||||||
.copied()
|
for metric in all_metrics {
|
||||||
.collect();
|
if metric.name.starts_with("cpu_")
|
||||||
let memory_metrics: Vec<&Metric> = all_metrics
|
|| metric.name.contains("c_state_")
|
||||||
.iter()
|
|| metric.name.starts_with("process_top_") {
|
||||||
.filter(|m| m.name.starts_with("memory_") || m.name.starts_with("disk_tmp_"))
|
cpu_metrics.push(metric);
|
||||||
.copied()
|
} else if metric.name.starts_with("memory_") || metric.name.starts_with("disk_tmp_") {
|
||||||
.collect();
|
memory_metrics.push(metric);
|
||||||
let service_metrics: Vec<&Metric> = all_metrics
|
} else if metric.name.starts_with("service_") {
|
||||||
.iter()
|
service_metrics.push(metric);
|
||||||
.filter(|m| m.name.starts_with("service_"))
|
} else if metric.name.starts_with("backup_") {
|
||||||
.copied()
|
backup_metrics.push(metric);
|
||||||
.collect();
|
} else if metric.name == "system_nixos_build" || metric.name == "system_active_users" || metric.name == "agent_version" {
|
||||||
let all_backup_metrics: Vec<&Metric> = all_metrics
|
nixos_metrics.push(metric);
|
||||||
.iter()
|
} else if metric.name.starts_with("disk_") {
|
||||||
.filter(|m| m.name.starts_with("backup_"))
|
disk_metrics.push(metric);
|
||||||
.copied()
|
}
|
||||||
.collect();
|
}
|
||||||
|
|
||||||
// Clear completed transitions first
|
// Clear completed transitions first
|
||||||
self.clear_completed_transitions(&hostname, &service_metrics);
|
self.clear_completed_transitions(&hostname, &service_metrics);
|
||||||
@@ -155,21 +170,7 @@ impl TuiApp {
|
|||||||
// Collect all system metrics (CPU, memory, NixOS, disk/storage)
|
// Collect all system metrics (CPU, memory, NixOS, disk/storage)
|
||||||
let mut system_metrics = cpu_metrics;
|
let mut system_metrics = cpu_metrics;
|
||||||
system_metrics.extend(memory_metrics);
|
system_metrics.extend(memory_metrics);
|
||||||
|
|
||||||
// Add NixOS metrics - using exact matching for build display fix
|
|
||||||
let nixos_metrics: Vec<&Metric> = all_metrics
|
|
||||||
.iter()
|
|
||||||
.filter(|m| m.name == "system_nixos_build" || m.name == "system_active_users" || m.name == "agent_version")
|
|
||||||
.copied()
|
|
||||||
.collect();
|
|
||||||
system_metrics.extend(nixos_metrics);
|
system_metrics.extend(nixos_metrics);
|
||||||
|
|
||||||
// Add disk/storage metrics
|
|
||||||
let disk_metrics: Vec<&Metric> = all_metrics
|
|
||||||
.iter()
|
|
||||||
.filter(|m| m.name.starts_with("disk_"))
|
|
||||||
.copied()
|
|
||||||
.collect();
|
|
||||||
system_metrics.extend(disk_metrics);
|
system_metrics.extend(disk_metrics);
|
||||||
|
|
||||||
host_widgets.system_widget.update_from_metrics(&system_metrics);
|
host_widgets.system_widget.update_from_metrics(&system_metrics);
|
||||||
@@ -178,7 +179,7 @@ impl TuiApp {
|
|||||||
.update_from_metrics(&service_metrics);
|
.update_from_metrics(&service_metrics);
|
||||||
host_widgets
|
host_widgets
|
||||||
.backup_widget
|
.backup_widget
|
||||||
.update_from_metrics(&all_backup_metrics);
|
.update_from_metrics(&backup_metrics);
|
||||||
|
|
||||||
host_widgets.last_update = Some(Instant::now());
|
host_widgets.last_update = Some(Instant::now());
|
||||||
}
|
}
|
||||||
@@ -186,30 +187,36 @@ impl TuiApp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Update available hosts with localhost prioritization
|
/// Update available hosts with localhost prioritization
|
||||||
pub fn update_hosts(&mut self, hosts: Vec<String>) {
|
pub fn update_hosts(&mut self, discovered_hosts: Vec<String>) {
|
||||||
// Sort hosts alphabetically
|
// Start with configured hosts (always visible)
|
||||||
let mut sorted_hosts = hosts.clone();
|
let mut all_hosts: Vec<String> = self.config.hosts.keys().cloned().collect();
|
||||||
|
|
||||||
|
// Add any discovered hosts that aren't already configured
|
||||||
|
for host in discovered_hosts {
|
||||||
|
if !all_hosts.contains(&host) {
|
||||||
|
all_hosts.push(host);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Keep hosts that have pending transitions even if they're offline
|
// Keep hosts that have pending transitions even if they're offline
|
||||||
for (hostname, host_widgets) in &self.host_widgets {
|
for (hostname, host_widgets) in &self.host_widgets {
|
||||||
if !host_widgets.pending_service_transitions.is_empty() {
|
if !host_widgets.pending_service_transitions.is_empty() {
|
||||||
if !sorted_hosts.contains(hostname) {
|
if !all_hosts.contains(hostname) {
|
||||||
sorted_hosts.push(hostname.clone());
|
all_hosts.push(hostname.clone());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sorted_hosts.sort();
|
all_hosts.sort();
|
||||||
self.available_hosts = sorted_hosts;
|
self.available_hosts = all_hosts;
|
||||||
|
|
||||||
// Get the current hostname (localhost) for auto-selection
|
// Get the current hostname (localhost) for auto-selection
|
||||||
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
|
||||||
if !self.available_hosts.is_empty() {
|
if !self.available_hosts.is_empty() {
|
||||||
if self.available_hosts.contains(&localhost) && !self.user_navigated_away {
|
if self.available_hosts.contains(&self.localhost) && !self.user_navigated_away {
|
||||||
// Localhost is available and user hasn't navigated away - switch to it
|
// Localhost is available and user hasn't navigated away - switch to it
|
||||||
self.current_host = Some(localhost.clone());
|
self.current_host = Some(self.localhost.clone());
|
||||||
// Find the actual index of localhost in the sorted list
|
// Find the actual index of localhost in the sorted list
|
||||||
self.host_index = self.available_hosts.iter().position(|h| h == &localhost).unwrap_or(0);
|
self.host_index = self.available_hosts.iter().position(|h| h == &self.localhost).unwrap_or(0);
|
||||||
} else if self.current_host.is_none() {
|
} else if self.current_host.is_none() {
|
||||||
// No current host - select first available (which is localhost if available)
|
// No current host - select first available (which is localhost if available)
|
||||||
self.current_host = Some(self.available_hosts[0].clone());
|
self.current_host = Some(self.available_hosts[0].clone());
|
||||||
@@ -244,14 +251,9 @@ impl TuiApp {
|
|||||||
KeyCode::Char('r') => {
|
KeyCode::Char('r') => {
|
||||||
// System rebuild command - works on any panel for current host
|
// System rebuild command - works on any panel for current host
|
||||||
if let Some(hostname) = self.current_host.clone() {
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
// Create command that shows CM Dashboard logo and then rebuilds
|
// Create command that shows logo, rebuilds, and waits for user input
|
||||||
let logo_and_rebuild = format!(
|
let logo_and_rebuild = format!(
|
||||||
r"cat << 'EOF'
|
"bash -c 'cat << \"EOF\"\nNixOS System Rebuild\nTarget: {}\n\nEOF\nssh -tt {}@{} \"bash -ic {}\"\necho\necho \"========================================\"\necho \"Rebuild completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
|
||||||
NixOS System Rebuild
|
|
||||||
Target: {}
|
|
||||||
|
|
||||||
EOF
|
|
||||||
ssh -tt {}@{} 'bash -ic {}'",
|
|
||||||
hostname,
|
hostname,
|
||||||
self.config.ssh.rebuild_user,
|
self.config.ssh.rebuild_user,
|
||||||
hostname,
|
hostname,
|
||||||
@@ -259,11 +261,10 @@ ssh -tt {}@{} 'bash -ic {}'",
|
|||||||
);
|
);
|
||||||
|
|
||||||
std::process::Command::new("tmux")
|
std::process::Command::new("tmux")
|
||||||
.arg("display-popup")
|
.arg("split-window")
|
||||||
.arg("-w")
|
.arg("-v")
|
||||||
.arg("80%")
|
.arg("-p")
|
||||||
.arg("-h")
|
.arg("30")
|
||||||
.arg("80%")
|
|
||||||
.arg(&logo_and_rebuild)
|
.arg(&logo_and_rebuild)
|
||||||
.spawn()
|
.spawn()
|
||||||
.ok(); // Ignore errors, tmux will handle them
|
.ok(); // Ignore errors, tmux will handle them
|
||||||
@@ -286,28 +287,50 @@ ssh -tt {}@{} 'bash -ic {}'",
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
KeyCode::Char('J') => {
|
KeyCode::Char('J') => {
|
||||||
// Show service logs via journalctl in tmux popup
|
// Show service logs via journalctl in tmux split window
|
||||||
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||||
let journalctl_command = format!(
|
let journalctl_command = format!(
|
||||||
"ssh -tt {}@{} 'journalctl -u {}.service -f --no-pager -n 50'",
|
"bash -c \"ssh -tt {}@{} 'sudo journalctl -u {}.service -f --no-pager -n 50'; exit\"",
|
||||||
self.config.ssh.rebuild_user,
|
self.config.ssh.rebuild_user,
|
||||||
hostname,
|
hostname,
|
||||||
service_name
|
service_name
|
||||||
);
|
);
|
||||||
|
|
||||||
std::process::Command::new("tmux")
|
std::process::Command::new("tmux")
|
||||||
.arg("display-popup")
|
.arg("split-window")
|
||||||
.arg("-w")
|
.arg("-v")
|
||||||
.arg("80%")
|
.arg("-p")
|
||||||
.arg("-h")
|
.arg("30")
|
||||||
.arg("80%")
|
|
||||||
.arg("-t")
|
|
||||||
.arg(format!("Logs: {}", service_name))
|
|
||||||
.arg(&journalctl_command)
|
.arg(&journalctl_command)
|
||||||
.spawn()
|
.spawn()
|
||||||
.ok(); // Ignore errors, tmux will handle them
|
.ok(); // Ignore errors, tmux will handle them
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
KeyCode::Char('L') => {
|
||||||
|
// Show custom service log file in tmux split window
|
||||||
|
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||||
|
// Check if this service has a custom log file configured
|
||||||
|
if let Some(host_logs) = self.config.service_logs.get(&hostname) {
|
||||||
|
if let Some(log_config) = host_logs.iter().find(|config| config.service_name == service_name) {
|
||||||
|
let tail_command = format!(
|
||||||
|
"bash -c \"ssh -tt {}@{} 'sudo tail -n 50 -f {}'; exit\"",
|
||||||
|
self.config.ssh.rebuild_user,
|
||||||
|
hostname,
|
||||||
|
log_config.log_file_path
|
||||||
|
);
|
||||||
|
|
||||||
|
std::process::Command::new("tmux")
|
||||||
|
.arg("split-window")
|
||||||
|
.arg("-v")
|
||||||
|
.arg("-p")
|
||||||
|
.arg("30")
|
||||||
|
.arg(&tail_command)
|
||||||
|
.spawn()
|
||||||
|
.ok(); // Ignore errors, tmux will handle them
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
KeyCode::Char('b') => {
|
KeyCode::Char('b') => {
|
||||||
// Trigger backup
|
// Trigger backup
|
||||||
if let Some(hostname) = self.current_host.clone() {
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
@@ -315,6 +338,33 @@ ssh -tt {}@{} 'bash -ic {}'",
|
|||||||
return Ok(Some(UiCommand::TriggerBackup { hostname }));
|
return Ok(Some(UiCommand::TriggerBackup { hostname }));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
KeyCode::Char('w') => {
|
||||||
|
// Wake on LAN for offline hosts
|
||||||
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
|
// Check if host has MAC address configured
|
||||||
|
if let Some(host_details) = self.config.hosts.get(&hostname) {
|
||||||
|
if let Some(mac_address) = &host_details.mac_address {
|
||||||
|
// Parse MAC address and send WoL packet
|
||||||
|
let mac_bytes = Self::parse_mac_address(mac_address);
|
||||||
|
match mac_bytes {
|
||||||
|
Ok(mac) => {
|
||||||
|
match MagicPacket::new(&mac).send() {
|
||||||
|
Ok(_) => {
|
||||||
|
info!("WakeOnLAN packet sent successfully to {} ({})", hostname, mac_address);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::error!("Failed to send WakeOnLAN packet to {}: {}", hostname, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
tracing::error!("Invalid MAC address format for {}: {}", hostname, mac_address);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
KeyCode::Tab => {
|
KeyCode::Tab => {
|
||||||
// Tab cycles to next host
|
// Tab cycles to next host
|
||||||
self.navigate_host(1);
|
self.navigate_host(1);
|
||||||
@@ -363,9 +413,8 @@ ssh -tt {}@{} 'bash -ic {}'",
|
|||||||
self.current_host = Some(self.available_hosts[self.host_index].clone());
|
self.current_host = Some(self.available_hosts[self.host_index].clone());
|
||||||
|
|
||||||
// Check if user navigated away from localhost
|
// Check if user navigated away from localhost
|
||||||
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
|
||||||
if let Some(ref current) = self.current_host {
|
if let Some(ref current) = self.current_host {
|
||||||
if current != &localhost {
|
if current != &self.localhost {
|
||||||
self.user_navigated_away = true;
|
self.user_navigated_away = true;
|
||||||
} else {
|
} else {
|
||||||
self.user_navigated_away = false; // User navigated back to localhost
|
self.user_navigated_away = false; // User navigated back to localhost
|
||||||
@@ -509,6 +558,21 @@ ssh -tt {}@{} 'bash -ic {}'",
|
|||||||
])
|
])
|
||||||
.split(main_chunks[1]); // main_chunks[1] is now the content area (between title and statusbar)
|
.split(main_chunks[1]); // main_chunks[1] is now the content area (between title and statusbar)
|
||||||
|
|
||||||
|
// Check if current host is offline
|
||||||
|
let current_host_offline = if let Some(hostname) = self.current_host.clone() {
|
||||||
|
self.calculate_host_status(&hostname, metric_store) == Status::Offline
|
||||||
|
} else {
|
||||||
|
true // No host selected is considered offline
|
||||||
|
};
|
||||||
|
|
||||||
|
// If host is offline, render wake-up message instead of panels
|
||||||
|
if current_host_offline {
|
||||||
|
self.render_offline_host_message(frame, main_chunks[1]);
|
||||||
|
self.render_btop_title(frame, main_chunks[0], metric_store);
|
||||||
|
self.render_statusbar(frame, main_chunks[2]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Check if backup panel should be shown
|
// Check if backup panel should be shown
|
||||||
let show_backup = if let Some(hostname) = self.current_host.clone() {
|
let show_backup = if let Some(hostname) = self.current_host.clone() {
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||||
@@ -576,11 +640,14 @@ ssh -tt {}@{} 'bash -ic {}'",
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate worst-case status across all hosts
|
// Calculate worst-case status across all hosts (excluding offline)
|
||||||
let mut worst_status = Status::Ok;
|
let mut worst_status = Status::Ok;
|
||||||
for host in &self.available_hosts {
|
for host in &self.available_hosts {
|
||||||
let host_status = self.calculate_host_status(host, metric_store);
|
let host_status = self.calculate_host_status(host, metric_store);
|
||||||
worst_status = Status::aggregate(&[worst_status, host_status]);
|
// Don't include offline hosts in status aggregation
|
||||||
|
if host_status != Status::Offline {
|
||||||
|
worst_status = Status::aggregate(&[worst_status, host_status]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the worst status color as background
|
// Use the worst status color as background
|
||||||
@@ -658,7 +725,7 @@ ssh -tt {}@{} 'bash -ic {}'",
|
|||||||
let metrics = metric_store.get_metrics_for_host(hostname);
|
let metrics = metric_store.get_metrics_for_host(hostname);
|
||||||
|
|
||||||
if metrics.is_empty() {
|
if metrics.is_empty() {
|
||||||
return Status::Unknown;
|
return Status::Offline;
|
||||||
}
|
}
|
||||||
|
|
||||||
// First check if we have the aggregated host status summary from the agent
|
// First check if we have the aggregated host status summary from the agent
|
||||||
@@ -678,7 +745,8 @@ ssh -tt {}@{} 'bash -ic {}'",
|
|||||||
Status::Warning => has_warning = true,
|
Status::Warning => has_warning = true,
|
||||||
Status::Pending => has_pending = true,
|
Status::Pending => has_pending = true,
|
||||||
Status::Ok => ok_count += 1,
|
Status::Ok => ok_count += 1,
|
||||||
Status::Unknown => {} // Ignore unknown for aggregation
|
Status::Unknown => {}, // Ignore unknown for aggregation
|
||||||
|
Status::Offline => {}, // Ignore offline for aggregation
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -718,6 +786,8 @@ ssh -tt {}@{} 'bash -ic {}'",
|
|||||||
shortcuts.push("r: Rebuild".to_string());
|
shortcuts.push("r: Rebuild".to_string());
|
||||||
shortcuts.push("s/S: Start/Stop".to_string());
|
shortcuts.push("s/S: Start/Stop".to_string());
|
||||||
shortcuts.push("J: Logs".to_string());
|
shortcuts.push("J: Logs".to_string());
|
||||||
|
shortcuts.push("L: Custom".to_string());
|
||||||
|
shortcuts.push("w: Wake".to_string());
|
||||||
|
|
||||||
// Always show quit
|
// Always show quit
|
||||||
shortcuts.push("q: Quit".to_string());
|
shortcuts.push("q: Quit".to_string());
|
||||||
@@ -756,5 +826,91 @@ ssh -tt {}@{} 'bash -ic {}'",
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Render offline host message with wake-up option
|
||||||
|
fn render_offline_host_message(&self, frame: &mut Frame, area: Rect) {
|
||||||
|
use ratatui::layout::Alignment;
|
||||||
|
use ratatui::style::Modifier;
|
||||||
|
use ratatui::text::{Line, Span};
|
||||||
|
use ratatui::widgets::{Block, Borders, Paragraph};
|
||||||
|
|
||||||
|
// Get hostname for message
|
||||||
|
let hostname = self.current_host.as_ref()
|
||||||
|
.map(|h| h.as_str())
|
||||||
|
.unwrap_or("Unknown");
|
||||||
|
|
||||||
|
// Check if host has MAC address for wake-on-LAN
|
||||||
|
let has_mac = self.current_host.as_ref()
|
||||||
|
.and_then(|hostname| self.config.hosts.get(hostname))
|
||||||
|
.and_then(|details| details.mac_address.as_ref())
|
||||||
|
.is_some();
|
||||||
|
|
||||||
|
// Create message content
|
||||||
|
let mut lines = vec![
|
||||||
|
Line::from(Span::styled(
|
||||||
|
format!("Host '{}' is offline", hostname),
|
||||||
|
Style::default().fg(Theme::muted_text()).add_modifier(Modifier::BOLD),
|
||||||
|
)),
|
||||||
|
Line::from(""),
|
||||||
|
];
|
||||||
|
|
||||||
|
if has_mac {
|
||||||
|
lines.push(Line::from(Span::styled(
|
||||||
|
"Press 'w' to wake up host",
|
||||||
|
Style::default().fg(Theme::primary_text()).add_modifier(Modifier::BOLD),
|
||||||
|
)));
|
||||||
|
} else {
|
||||||
|
lines.push(Line::from(Span::styled(
|
||||||
|
"No MAC address configured - cannot wake up",
|
||||||
|
Style::default().fg(Theme::muted_text()),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create centered message
|
||||||
|
let message = Paragraph::new(lines)
|
||||||
|
.block(Block::default()
|
||||||
|
.borders(Borders::ALL)
|
||||||
|
.border_style(Style::default().fg(Theme::muted_text()))
|
||||||
|
.title(" Offline Host ")
|
||||||
|
.title_style(Style::default().fg(Theme::muted_text()).add_modifier(Modifier::BOLD)))
|
||||||
|
.style(Style::default().bg(Theme::background()).fg(Theme::primary_text()))
|
||||||
|
.alignment(Alignment::Center);
|
||||||
|
|
||||||
|
// Center the message in the available area
|
||||||
|
let popup_area = ratatui::layout::Layout::default()
|
||||||
|
.direction(Direction::Vertical)
|
||||||
|
.constraints([
|
||||||
|
Constraint::Percentage(40),
|
||||||
|
Constraint::Length(6),
|
||||||
|
Constraint::Percentage(40),
|
||||||
|
])
|
||||||
|
.split(area)[1];
|
||||||
|
|
||||||
|
let popup_area = ratatui::layout::Layout::default()
|
||||||
|
.direction(Direction::Horizontal)
|
||||||
|
.constraints([
|
||||||
|
Constraint::Percentage(25),
|
||||||
|
Constraint::Percentage(50),
|
||||||
|
Constraint::Percentage(25),
|
||||||
|
])
|
||||||
|
.split(popup_area)[1];
|
||||||
|
|
||||||
|
frame.render_widget(message, popup_area);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse MAC address string (e.g., "AA:BB:CC:DD:EE:FF") to [u8; 6]
|
||||||
|
fn parse_mac_address(mac_str: &str) -> Result<[u8; 6], &'static str> {
|
||||||
|
let parts: Vec<&str> = mac_str.split(':').collect();
|
||||||
|
if parts.len() != 6 {
|
||||||
|
return Err("MAC address must have 6 parts separated by colons");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut mac = [0u8; 6];
|
||||||
|
for (i, part) in parts.iter().enumerate() {
|
||||||
|
match u8::from_str_radix(part, 16) {
|
||||||
|
Ok(byte) => mac[i] = byte,
|
||||||
|
Err(_) => return Err("Invalid hexadecimal byte in MAC address"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(mac)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -147,6 +147,7 @@ impl Theme {
|
|||||||
Status::Warning => Self::warning(),
|
Status::Warning => Self::warning(),
|
||||||
Status::Critical => Self::error(),
|
Status::Critical => Self::error(),
|
||||||
Status::Unknown => Self::muted_text(),
|
Status::Unknown => Self::muted_text(),
|
||||||
|
Status::Offline => Self::muted_text(), // Dark gray for offline
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -244,8 +245,9 @@ impl StatusIcons {
|
|||||||
Status::Ok => "●",
|
Status::Ok => "●",
|
||||||
Status::Pending => "◉", // Hollow circle for pending
|
Status::Pending => "◉", // Hollow circle for pending
|
||||||
Status::Warning => "◐",
|
Status::Warning => "◐",
|
||||||
Status::Critical => "◯",
|
Status::Critical => "!",
|
||||||
Status::Unknown => "?",
|
Status::Unknown => "?",
|
||||||
|
Status::Offline => "○", // Empty circle for offline
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -258,6 +260,7 @@ impl StatusIcons {
|
|||||||
Status::Warning => Theme::warning(), // Yellow
|
Status::Warning => Theme::warning(), // Yellow
|
||||||
Status::Critical => Theme::error(), // Red
|
Status::Critical => Theme::error(), // Red
|
||||||
Status::Unknown => Theme::muted_text(), // Gray
|
Status::Unknown => Theme::muted_text(), // Gray
|
||||||
|
Status::Offline => Theme::muted_text(), // Dark gray for offline
|
||||||
};
|
};
|
||||||
|
|
||||||
vec![
|
vec![
|
||||||
|
|||||||
@@ -146,6 +146,7 @@ impl ServicesWidget {
|
|||||||
Status::Warning => Theme::warning(),
|
Status::Warning => Theme::warning(),
|
||||||
Status::Critical => Theme::error(),
|
Status::Critical => Theme::error(),
|
||||||
Status::Unknown => Theme::muted_text(),
|
Status::Unknown => Theme::muted_text(),
|
||||||
|
Status::Offline => Theme::muted_text(),
|
||||||
};
|
};
|
||||||
|
|
||||||
(icon.to_string(), info.status.clone(), status_color)
|
(icon.to_string(), info.status.clone(), status_color)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.47"
|
version = "0.1.64"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@@ -87,6 +87,7 @@ pub enum Status {
|
|||||||
Warning,
|
Warning,
|
||||||
Critical,
|
Critical,
|
||||||
Unknown,
|
Unknown,
|
||||||
|
Offline,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Status {
|
impl Status {
|
||||||
@@ -190,6 +191,16 @@ impl HysteresisThresholds {
|
|||||||
Status::Ok
|
Status::Ok
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Status::Offline => {
|
||||||
|
// Host coming back online, use normal thresholds like first measurement
|
||||||
|
if value >= self.critical_high {
|
||||||
|
Status::Critical
|
||||||
|
} else if value >= self.warning_high {
|
||||||
|
Status::Warning
|
||||||
|
} else {
|
||||||
|
Status::Ok
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user