Compare commits
28 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 33e700529e | |||
| d644b7d40a | |||
| f635ba9c75 | |||
| 76b6e3373e | |||
| 0a13cab897 | |||
| d33ec5d225 | |||
| d31c2384df | |||
| c8db463204 | |||
| e8e50ef9bb | |||
| 0faed9309e | |||
| c980346d05 | |||
| 3e3d3f0c2b | |||
| 9eb7444d56 | |||
| 278d1763aa | |||
| f874264e13 | |||
| 5f6e47ece5 | |||
| 0e7cf24dbb | |||
| 2d080a2f51 | |||
| 6179bd51a7 | |||
| 57de4c366a | |||
| e18778e962 | |||
| e4469a0ebf | |||
| 6fedf4c7fc | |||
| 3f6dffa66e | |||
| 1b64fbde3d | |||
| 4f4c3b0d6e | |||
| bd20f0cae1 | |||
| 11c9a5f9d2 |
@@ -113,13 +113,13 @@ jobs:
|
||||
NIX_HASH="sha256-$(python3 -c "import base64, binascii; print(base64.b64encode(binascii.unhexlify('$NEW_HASH')).decode())")"
|
||||
|
||||
# Update the NixOS configuration
|
||||
sed -i "s|version = \"v[^\"]*\"|version = \"$VERSION\"|" hosts/common/cm-dashboard.nix
|
||||
sed -i "s|sha256 = \"sha256-[^\"]*\"|sha256 = \"$NIX_HASH\"|" hosts/common/cm-dashboard.nix
|
||||
sed -i "s|version = \"v[^\"]*\"|version = \"$VERSION\"|" hosts/services/cm-dashboard.nix
|
||||
sed -i "s|sha256 = \"sha256-[^\"]*\"|sha256 = \"$NIX_HASH\"|" hosts/services/cm-dashboard.nix
|
||||
|
||||
# Commit and push changes
|
||||
git config user.name "Gitea Actions"
|
||||
git config user.email "actions@gitea.cmtec.se"
|
||||
git add hosts/common/cm-dashboard.nix
|
||||
git add hosts/services/cm-dashboard.nix
|
||||
git commit -m "Auto-update cm-dashboard to $VERSION
|
||||
|
||||
- Update version to $VERSION with automated release
|
||||
|
||||
21
CLAUDE.md
21
CLAUDE.md
@@ -20,11 +20,28 @@ A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure.
|
||||
- Persistent storage survives agent restarts
|
||||
- Automatic flag clearing when services are restarted via dashboard
|
||||
|
||||
### Custom Service Logs
|
||||
- Configure service-specific log file paths per host in dashboard config
|
||||
- Press `L` on any service to view custom log files via `tail -f`
|
||||
- Configuration format in dashboard config:
|
||||
```toml
|
||||
[service_logs]
|
||||
hostname1 = [
|
||||
{ service_name = "nginx", log_file_path = "/var/log/nginx/access.log" },
|
||||
{ service_name = "app", log_file_path = "/var/log/myapp/app.log" }
|
||||
]
|
||||
hostname2 = [
|
||||
{ service_name = "database", log_file_path = "/var/log/postgres/postgres.log" }
|
||||
]
|
||||
```
|
||||
|
||||
### Service Management
|
||||
- **Direct Control**: Arrow keys (↑↓) or vim keys (j/k) navigate services
|
||||
- **Service Actions**:
|
||||
- `s` - Start service (sends UserStart command)
|
||||
- `S` - Stop service (sends UserStop command)
|
||||
- `J` - Show service logs (journalctl in tmux popup)
|
||||
- `L` - Show custom log files (tail -f custom paths in tmux popup)
|
||||
- `R` - Rebuild current host
|
||||
- **Visual Status**: Green ● (active), Yellow ◐ (inactive), Red ◯ (failed)
|
||||
- **Transitional Icons**: Blue arrows during operations
|
||||
@@ -32,6 +49,8 @@ A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure.
|
||||
### Navigation
|
||||
- **Tab**: Switch between hosts
|
||||
- **↑↓ or j/k**: Select services
|
||||
- **J**: Show service logs (journalctl)
|
||||
- **L**: Show custom log files
|
||||
- **q**: Quit dashboard
|
||||
|
||||
## Core Architecture Principles
|
||||
@@ -96,7 +115,7 @@ This automatically:
|
||||
- Uploads binaries via Gitea API
|
||||
|
||||
### NixOS Configuration Updates
|
||||
Edit `~/projects/nixosbox/hosts/common/cm-dashboard.nix`:
|
||||
Edit `~/projects/nixosbox/hosts/services/cm-dashboard.nix`:
|
||||
|
||||
```nix
|
||||
version = "v0.1.X";
|
||||
|
||||
13
Cargo.lock
generated
13
Cargo.lock
generated
@@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.44"
|
||||
version = "0.1.69"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@@ -286,12 +286,13 @@ dependencies = [
|
||||
"toml",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"wake-on-lan",
|
||||
"zmq",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.44"
|
||||
version = "0.1.69"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -314,7 +315,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.44"
|
||||
version = "0.1.69"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
@@ -2064,6 +2065,12 @@ version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||
|
||||
[[package]]
|
||||
name = "wake-on-lan"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ccf60b60ad7e5b1b37372c5134cbcab4db0706c231d212e0c643a077462bc8f"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.5.0"
|
||||
|
||||
@@ -87,6 +87,7 @@ cm-dashboard • ● cmbox ● srv01 ● srv02 ● steambox
|
||||
- **↑↓ or j/k**: Navigate services
|
||||
- **s**: Start selected service (UserStart)
|
||||
- **S**: Stop selected service (UserStop)
|
||||
- **J**: Show service logs (journalctl in tmux popup)
|
||||
- **R**: Rebuild current host
|
||||
- **q**: Quit
|
||||
|
||||
@@ -328,7 +329,7 @@ This triggers automated:
|
||||
- Tarball upload to Gitea
|
||||
|
||||
### NixOS Integration
|
||||
Update `~/projects/nixosbox/hosts/common/cm-dashboard.nix`:
|
||||
Update `~/projects/nixosbox/hosts/services/cm-dashboard.nix`:
|
||||
|
||||
```nix
|
||||
version = "v0.1.43";
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.45"
|
||||
version = "0.1.71"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -78,10 +78,11 @@ impl Agent {
|
||||
info!("Initial metric collection completed - all data cached and ready");
|
||||
}
|
||||
|
||||
// Separate intervals for collection, transmission, and email notifications
|
||||
// Separate intervals for collection, transmission, heartbeat, and email notifications
|
||||
let mut collection_interval =
|
||||
interval(Duration::from_secs(self.config.collection_interval_seconds));
|
||||
let mut transmission_interval = interval(Duration::from_secs(self.config.zmq.transmission_interval_seconds));
|
||||
let mut heartbeat_interval = interval(Duration::from_secs(self.config.zmq.heartbeat_interval_seconds));
|
||||
let mut notification_interval = interval(Duration::from_secs(self.config.notifications.aggregation_interval_seconds));
|
||||
|
||||
loop {
|
||||
@@ -98,6 +99,12 @@ impl Agent {
|
||||
error!("Failed to broadcast metrics: {}", e);
|
||||
}
|
||||
}
|
||||
_ = heartbeat_interval.tick() => {
|
||||
// Send standalone heartbeat for host connectivity detection
|
||||
if let Err(e) = self.send_heartbeat().await {
|
||||
error!("Failed to send heartbeat: {}", e);
|
||||
}
|
||||
}
|
||||
_ = notification_interval.tick() => {
|
||||
// Process batched email notifications (separate from dashboard updates)
|
||||
if let Err(e) = self.host_status_manager.process_pending_notifications(&mut self.notification_manager).await {
|
||||
@@ -180,6 +187,13 @@ impl Agent {
|
||||
let version_metric = self.get_agent_version_metric();
|
||||
metrics.push(version_metric);
|
||||
|
||||
// Add heartbeat metric for host connectivity detection
|
||||
let heartbeat_metric = self.get_heartbeat_metric();
|
||||
metrics.push(heartbeat_metric);
|
||||
|
||||
// Check for user-stopped services that are now active and clear their flags
|
||||
self.clear_user_stopped_flags_for_active_services(&metrics);
|
||||
|
||||
if metrics.is_empty() {
|
||||
debug!("No metrics to broadcast");
|
||||
return Ok(());
|
||||
@@ -198,6 +212,12 @@ impl Agent {
|
||||
async fn process_metrics(&mut self, metrics: &[Metric]) -> bool {
|
||||
let mut status_changed = false;
|
||||
for metric in metrics {
|
||||
// Filter excluded metrics from email notification processing only
|
||||
if self.config.notifications.exclude_email_metrics.contains(&metric.name) {
|
||||
debug!("Excluding metric '{}' from email notification processing", metric.name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if self.host_status_manager.process_metric(metric, &mut self.notification_manager).await {
|
||||
status_changed = true;
|
||||
}
|
||||
@@ -223,6 +243,35 @@ impl Agent {
|
||||
format!("v{}", env!("CARGO_PKG_VERSION"))
|
||||
}
|
||||
|
||||
/// Create heartbeat metric for host connectivity detection
|
||||
fn get_heartbeat_metric(&self) -> Metric {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
let timestamp = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs();
|
||||
|
||||
Metric::new(
|
||||
"agent_heartbeat".to_string(),
|
||||
MetricValue::Integer(timestamp as i64),
|
||||
Status::Ok,
|
||||
)
|
||||
}
|
||||
|
||||
/// Send standalone heartbeat for connectivity detection
|
||||
async fn send_heartbeat(&mut self) -> Result<()> {
|
||||
let heartbeat_metric = self.get_heartbeat_metric();
|
||||
let message = MetricMessage::new(
|
||||
self.hostname.clone(),
|
||||
vec![heartbeat_metric],
|
||||
);
|
||||
|
||||
self.zmq_handler.publish_metrics(&message).await?;
|
||||
debug!("Sent standalone heartbeat for connectivity detection");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_commands(&mut self) -> Result<()> {
|
||||
// Try to receive commands (non-blocking)
|
||||
match self.zmq_handler.try_receive_command() {
|
||||
@@ -302,46 +351,71 @@ impl Agent {
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let output = tokio::process::Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg(action_str)
|
||||
.arg(format!("{}.service", service_name))
|
||||
.output()
|
||||
.await?;
|
||||
// Spawn the systemctl command asynchronously to avoid blocking the agent
|
||||
let service_name_clone = service_name.to_string();
|
||||
let action_str_clone = action_str.to_string();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let result = tokio::process::Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg(&action_str_clone)
|
||||
.arg(format!("{}.service", service_name_clone))
|
||||
.output()
|
||||
.await;
|
||||
|
||||
if output.status.success() {
|
||||
info!("Service {} {} completed successfully", service_name, action_str);
|
||||
if !output.stdout.is_empty() {
|
||||
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
|
||||
}
|
||||
|
||||
// Clear user-stopped flag AFTER successful start command
|
||||
if matches!(action, ServiceAction::UserStart) {
|
||||
info!("Clearing user-stopped flag for service '{}' after successful start", service_name);
|
||||
if let Err(e) = self.service_tracker.clear_user_stopped(service_name) {
|
||||
error!("Failed to clear user-stopped flag: {}", e);
|
||||
} else {
|
||||
// Sync to global tracker
|
||||
UserStoppedServiceTracker::update_global(&self.service_tracker);
|
||||
match result {
|
||||
Ok(output) => {
|
||||
if output.status.success() {
|
||||
info!("Service {} {} completed successfully", service_name_clone, action_str_clone);
|
||||
if !output.stdout.is_empty() {
|
||||
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
|
||||
}
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
error!("Service {} {} failed: {}", service_name_clone, action_str_clone, stderr);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to execute systemctl {} {}: {}", action_str_clone, service_name_clone, e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
error!("Service {} {} failed: {}", service_name, action_str, stderr);
|
||||
return Err(anyhow::anyhow!("systemctl {} {} failed: {}", action_str, service_name, stderr));
|
||||
}
|
||||
});
|
||||
|
||||
// Force refresh metrics after service control to update service status
|
||||
if matches!(action, ServiceAction::Start | ServiceAction::Stop | ServiceAction::UserStart | ServiceAction::UserStop) {
|
||||
info!("Triggering immediate metric refresh after service control");
|
||||
if let Err(e) = self.collect_metrics_only().await {
|
||||
error!("Failed to refresh metrics after service control: {}", e);
|
||||
} else {
|
||||
info!("Service status refreshed immediately after {} {}", action_str, service_name);
|
||||
}
|
||||
}
|
||||
info!("Service {} {} command initiated (non-blocking)", service_name, action_str);
|
||||
|
||||
// Note: Service status will be updated by the normal metric collection cycle
|
||||
// once the systemctl operation completes
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check metrics for user-stopped services that are now active and clear their flags
|
||||
fn clear_user_stopped_flags_for_active_services(&mut self, metrics: &[Metric]) {
|
||||
for metric in metrics {
|
||||
// Look for service status metrics that are active
|
||||
if metric.name.starts_with("service_") && metric.name.ends_with("_status") {
|
||||
if let MetricValue::String(status) = &metric.value {
|
||||
if status == "active" {
|
||||
// Extract service name from metric name (service_nginx_status -> nginx)
|
||||
let service_name = metric.name
|
||||
.strip_prefix("service_")
|
||||
.and_then(|s| s.strip_suffix("_status"))
|
||||
.unwrap_or("");
|
||||
|
||||
if !service_name.is_empty() && UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
info!("Service '{}' is now active - clearing user-stopped flag", service_name);
|
||||
if let Err(e) = self.service_tracker.clear_user_stopped(service_name) {
|
||||
error!("Failed to clear user-stopped flag for '{}': {}", service_name, e);
|
||||
} else {
|
||||
// Sync to global tracker
|
||||
UserStoppedServiceTracker::update_global(&self.service_tracker);
|
||||
debug!("Cleared user-stopped flag for service '{}'", service_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -140,6 +140,7 @@ impl Collector for BackupCollector {
|
||||
Status::Warning => "warning".to_string(),
|
||||
Status::Critical => "critical".to_string(),
|
||||
Status::Unknown => "unknown".to_string(),
|
||||
Status::Offline => "offline".to_string(),
|
||||
}),
|
||||
status: overall_status,
|
||||
timestamp,
|
||||
@@ -202,6 +203,7 @@ impl Collector for BackupCollector {
|
||||
Status::Warning => "warning".to_string(),
|
||||
Status::Critical => "critical".to_string(),
|
||||
Status::Unknown => "unknown".to_string(),
|
||||
Status::Offline => "offline".to_string(),
|
||||
}),
|
||||
status: service_status,
|
||||
timestamp,
|
||||
|
||||
@@ -357,7 +357,15 @@ impl SystemdCollector {
|
||||
/// Calculate service status, taking user-stopped services into account
|
||||
fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status {
|
||||
match active_status.to_lowercase().as_str() {
|
||||
"active" => Status::Ok,
|
||||
"active" => {
|
||||
// If service is now active and was marked as user-stopped, clear the flag
|
||||
if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
debug!("Service '{}' is now active - clearing user-stopped flag", service_name);
|
||||
// Note: We can't directly clear here because this is a read-only context
|
||||
// The agent will need to handle this differently
|
||||
}
|
||||
Status::Ok
|
||||
},
|
||||
"inactive" | "dead" => {
|
||||
// Check if this service was stopped by user action
|
||||
if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
@@ -368,7 +376,15 @@ impl SystemdCollector {
|
||||
}
|
||||
},
|
||||
"failed" | "error" => Status::Critical,
|
||||
"activating" | "deactivating" | "reloading" | "start" | "stop" | "restart" => Status::Pending,
|
||||
"activating" | "deactivating" | "reloading" | "start" | "stop" | "restart" => {
|
||||
// For user-stopped services that are transitioning, keep them as OK during transition
|
||||
if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
|
||||
debug!("Service '{}' is transitioning but was user-stopped - treating as OK", service_name);
|
||||
Status::Ok
|
||||
} else {
|
||||
Status::Pending
|
||||
}
|
||||
},
|
||||
_ => Status::Unknown,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,8 +66,6 @@ impl ZmqHandler {
|
||||
}
|
||||
|
||||
|
||||
/// Send heartbeat (placeholder for future use)
|
||||
|
||||
/// Try to receive a command (non-blocking)
|
||||
pub fn try_receive_command(&self) -> Result<Option<AgentCommand>> {
|
||||
match self.command_receiver.recv_bytes(zmq::DONTWAIT) {
|
||||
|
||||
@@ -25,9 +25,10 @@ pub struct ZmqConfig {
|
||||
pub publisher_port: u16,
|
||||
pub command_port: u16,
|
||||
pub bind_address: String,
|
||||
pub timeout_ms: u64,
|
||||
pub heartbeat_interval_ms: u64,
|
||||
pub transmission_interval_seconds: u64,
|
||||
/// Heartbeat transmission interval in seconds for host connectivity detection
|
||||
#[serde(default = "default_heartbeat_interval_seconds")]
|
||||
pub heartbeat_interval_seconds: u64,
|
||||
}
|
||||
|
||||
/// Collector configuration
|
||||
@@ -146,9 +147,23 @@ pub struct NotificationConfig {
|
||||
pub rate_limit_minutes: u64,
|
||||
/// Email notification batching interval in seconds (default: 60)
|
||||
pub aggregation_interval_seconds: u64,
|
||||
/// List of metric names to exclude from email notifications
|
||||
#[serde(default)]
|
||||
pub exclude_email_metrics: Vec<String>,
|
||||
/// Path to maintenance mode file that suppresses email notifications when present
|
||||
#[serde(default = "default_maintenance_mode_file")]
|
||||
pub maintenance_mode_file: String,
|
||||
}
|
||||
|
||||
|
||||
fn default_heartbeat_interval_seconds() -> u64 {
|
||||
5
|
||||
}
|
||||
|
||||
fn default_maintenance_mode_file() -> String {
|
||||
"/tmp/cm-maintenance".to_string()
|
||||
}
|
||||
|
||||
impl AgentConfig {
|
||||
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||
loader::load_config(path)
|
||||
|
||||
@@ -19,10 +19,6 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
|
||||
bail!("ZMQ bind address cannot be empty");
|
||||
}
|
||||
|
||||
if config.zmq.timeout_ms == 0 {
|
||||
bail!("ZMQ timeout cannot be 0");
|
||||
}
|
||||
|
||||
// Validate collection interval
|
||||
if config.collection_interval_seconds == 0 {
|
||||
bail!("Collection interval cannot be 0");
|
||||
|
||||
@@ -59,6 +59,6 @@ impl NotificationManager {
|
||||
}
|
||||
|
||||
fn is_maintenance_mode(&self) -> bool {
|
||||
std::fs::metadata("/tmp/cm-maintenance").is_ok()
|
||||
std::fs::metadata(&self.config.maintenance_mode_file).is_ok()
|
||||
}
|
||||
}
|
||||
@@ -272,11 +272,13 @@ impl HostStatusManager {
|
||||
/// Check if a status change is significant enough for notification
|
||||
fn is_significant_change(&self, old_status: Status, new_status: Status) -> bool {
|
||||
match (old_status, new_status) {
|
||||
// Always notify on problems
|
||||
// Don't notify on transitions from Unknown (startup/restart scenario)
|
||||
(Status::Unknown, _) => false,
|
||||
// Always notify on problems (but not from Unknown)
|
||||
(_, Status::Warning) | (_, Status::Critical) => true,
|
||||
// Only notify on recovery if it's from a problem state to OK and all services are OK
|
||||
(Status::Warning | Status::Critical, Status::Ok) => self.current_host_status == Status::Ok,
|
||||
// Don't notify on startup or other transitions
|
||||
// Don't notify on other transitions
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
@@ -374,8 +376,8 @@ impl HostStatusManager {
|
||||
details.push('\n');
|
||||
}
|
||||
|
||||
// Show recoveries
|
||||
if !recovery_changes.is_empty() {
|
||||
// Show recoveries only if host status is now OK (all services recovered)
|
||||
if !recovery_changes.is_empty() && aggregated.host_status_final == Status::Ok {
|
||||
details.push_str(&format!("✅ RECOVERIES ({}):\n", recovery_changes.len()));
|
||||
for change in recovery_changes {
|
||||
details.push_str(&format!(" {}\n", change));
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.45"
|
||||
version = "0.1.71"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
@@ -18,4 +18,5 @@ tracing-subscriber = { workspace = true }
|
||||
ratatui = { workspace = true }
|
||||
crossterm = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
gethostname = { workspace = true }
|
||||
gethostname = { workspace = true }
|
||||
wake-on-lan = "0.2"
|
||||
@@ -22,7 +22,7 @@ pub struct Dashboard {
|
||||
terminal: Option<Terminal<CrosstermBackend<io::Stdout>>>,
|
||||
headless: bool,
|
||||
initial_commands_sent: std::collections::HashSet<String>,
|
||||
_config: DashboardConfig,
|
||||
config: DashboardConfig,
|
||||
}
|
||||
|
||||
impl Dashboard {
|
||||
@@ -67,11 +67,8 @@ impl Dashboard {
|
||||
}
|
||||
};
|
||||
|
||||
// Connect to predefined hosts from configuration
|
||||
let hosts = config.hosts.predefined_hosts.clone();
|
||||
|
||||
// Try to connect to hosts but don't fail if none are available
|
||||
match zmq_consumer.connect_to_predefined_hosts(&hosts).await {
|
||||
match zmq_consumer.connect_to_predefined_hosts(&config.hosts).await {
|
||||
Ok(_) => info!("Successfully connected to ZMQ hosts"),
|
||||
Err(e) => {
|
||||
warn!(
|
||||
@@ -133,7 +130,7 @@ impl Dashboard {
|
||||
terminal,
|
||||
headless,
|
||||
initial_commands_sent: std::collections::HashSet::new(),
|
||||
_config: config,
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -149,6 +146,8 @@ impl Dashboard {
|
||||
|
||||
let mut last_metrics_check = Instant::now();
|
||||
let metrics_check_interval = Duration::from_millis(100); // Check for metrics every 100ms
|
||||
let mut last_heartbeat_check = Instant::now();
|
||||
let heartbeat_check_interval = Duration::from_secs(1); // Check for host connectivity every 1 second
|
||||
|
||||
loop {
|
||||
// Handle terminal events (keyboard input) only if not headless
|
||||
@@ -191,6 +190,17 @@ impl Dashboard {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Render UI immediately after handling input for responsive feedback
|
||||
if let Some(ref mut terminal) = self.terminal {
|
||||
if let Some(ref mut tui_app) = self.tui_app {
|
||||
if let Err(e) = terminal.draw(|frame| {
|
||||
tui_app.render(frame, &self.metric_store);
|
||||
}) {
|
||||
error!("Error rendering TUI after input: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for new metrics
|
||||
@@ -243,14 +253,8 @@ impl Dashboard {
|
||||
}
|
||||
}
|
||||
|
||||
// Update TUI with new hosts and metrics (only if not headless)
|
||||
// Update TUI with new metrics (only if not headless)
|
||||
if let Some(ref mut tui_app) = self.tui_app {
|
||||
let connected_hosts = self
|
||||
.metric_store
|
||||
.get_connected_hosts(Duration::from_secs(30));
|
||||
|
||||
|
||||
tui_app.update_hosts(connected_hosts);
|
||||
tui_app.update_metrics(&self.metric_store);
|
||||
}
|
||||
}
|
||||
@@ -269,6 +273,20 @@ impl Dashboard {
|
||||
last_metrics_check = Instant::now();
|
||||
}
|
||||
|
||||
// Check for host connectivity changes (heartbeat timeouts) periodically
|
||||
if last_heartbeat_check.elapsed() >= heartbeat_check_interval {
|
||||
let timeout = Duration::from_secs(self.config.zmq.heartbeat_timeout_seconds);
|
||||
|
||||
// Clean up metrics for offline hosts
|
||||
self.metric_store.cleanup_offline_hosts(timeout);
|
||||
|
||||
if let Some(ref mut tui_app) = self.tui_app {
|
||||
let connected_hosts = self.metric_store.get_connected_hosts(timeout);
|
||||
tui_app.update_hosts(connected_hosts);
|
||||
}
|
||||
last_heartbeat_check = Instant::now();
|
||||
}
|
||||
|
||||
// Render TUI (only if not headless)
|
||||
if !self.headless {
|
||||
if let Some(ref mut terminal) = self.terminal {
|
||||
|
||||
@@ -71,6 +71,12 @@ impl ZmqConsumer {
|
||||
pub async fn connect_to_host(&mut self, hostname: &str, port: u16) -> Result<()> {
|
||||
let address = format!("tcp://{}:{}", hostname, port);
|
||||
|
||||
// First test basic TCP connectivity to the port
|
||||
if let Err(e) = self.test_tcp_connectivity(hostname, port).await {
|
||||
error!("TCP connectivity test failed for {}: {}", address, e);
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
match self.subscriber.connect(&address) {
|
||||
Ok(()) => {
|
||||
info!("Connected to agent at {}", address);
|
||||
@@ -84,13 +90,33 @@ impl ZmqConsumer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Connect to predefined hosts
|
||||
pub async fn connect_to_predefined_hosts(&mut self, hosts: &[String]) -> Result<()> {
|
||||
/// Test TCP connectivity to a host and port with timeout
|
||||
async fn test_tcp_connectivity(&self, hostname: &str, port: u16) -> Result<()> {
|
||||
let timeout = std::time::Duration::from_secs(3);
|
||||
|
||||
match tokio::time::timeout(timeout, tokio::net::TcpStream::connect((hostname, port))).await {
|
||||
Ok(Ok(_stream)) => {
|
||||
debug!("TCP connectivity test passed for {}:{}", hostname, port);
|
||||
Ok(())
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
debug!("TCP connectivity test failed for {}:{}: {}", hostname, port, e);
|
||||
Err(anyhow::anyhow!("TCP connection failed: {}", e))
|
||||
}
|
||||
Err(_) => {
|
||||
debug!("TCP connectivity test timed out for {}:{}", hostname, port);
|
||||
Err(anyhow::anyhow!("TCP connection timed out"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Connect to predefined hosts using their configuration
|
||||
pub async fn connect_to_predefined_hosts(&mut self, hosts: &std::collections::HashMap<String, crate::config::HostDetails>) -> Result<()> {
|
||||
let default_port = self.config.subscriber_ports[0];
|
||||
|
||||
for hostname in hosts {
|
||||
// Try to connect, but don't fail if some hosts are unreachable
|
||||
if let Err(e) = self.connect_to_host(hostname, default_port).await {
|
||||
for (hostname, host_details) in hosts {
|
||||
// Try to connect using configured IP, but don't fail if some hosts are unreachable
|
||||
if let Err(e) = self.connect_to_host_with_details(hostname, host_details, default_port).await {
|
||||
warn!("Could not connect to {}: {}", hostname, e);
|
||||
}
|
||||
}
|
||||
@@ -104,6 +130,15 @@ impl ZmqConsumer {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Connect to a host using its configuration details
|
||||
pub async fn connect_to_host_with_details(&mut self, hostname: &str, host_details: &crate::config::HostDetails, port: u16) -> Result<()> {
|
||||
// Get primary connection IP only - no fallbacks
|
||||
let primary_ip = host_details.get_connection_ip(hostname);
|
||||
|
||||
// Connect directly without fallback attempts
|
||||
self.connect_to_host(&primary_ip, port).await
|
||||
}
|
||||
|
||||
/// Receive command output from any connected agent (non-blocking)
|
||||
pub async fn receive_command_output(&mut self) -> Result<Option<CommandOutputMessage>> {
|
||||
match self.subscriber.recv_bytes(zmq::DONTWAIT) {
|
||||
|
||||
@@ -6,21 +6,40 @@ use std::path::Path;
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DashboardConfig {
|
||||
pub zmq: ZmqConfig,
|
||||
pub hosts: HostsConfig,
|
||||
pub hosts: std::collections::HashMap<String, HostDetails>,
|
||||
pub system: SystemConfig,
|
||||
pub ssh: SshConfig,
|
||||
pub service_logs: std::collections::HashMap<String, Vec<ServiceLogConfig>>,
|
||||
}
|
||||
|
||||
/// ZMQ consumer configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ZmqConfig {
|
||||
pub subscriber_ports: Vec<u16>,
|
||||
/// Heartbeat timeout in seconds - hosts considered offline if no heartbeat received within this time
|
||||
#[serde(default = "default_heartbeat_timeout_seconds")]
|
||||
pub heartbeat_timeout_seconds: u64,
|
||||
}
|
||||
|
||||
/// Hosts configuration
|
||||
fn default_heartbeat_timeout_seconds() -> u64 {
|
||||
10 // Default to 10 seconds - allows for multiple missed heartbeats
|
||||
}
|
||||
|
||||
/// Individual host configuration details
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct HostsConfig {
|
||||
pub predefined_hosts: Vec<String>,
|
||||
pub struct HostDetails {
|
||||
pub mac_address: Option<String>,
|
||||
/// Primary IP address (local network)
|
||||
pub ip: Option<String>,
|
||||
}
|
||||
|
||||
|
||||
impl HostDetails {
|
||||
/// Get the IP address for connection (uses ip field or hostname as fallback)
|
||||
pub fn get_connection_ip(&self, hostname: &str) -> String {
|
||||
self.ip.as_ref().unwrap_or(&hostname.to_string()).clone()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// System configuration
|
||||
@@ -39,6 +58,13 @@ pub struct SshConfig {
|
||||
pub rebuild_alias: String,
|
||||
}
|
||||
|
||||
/// Service log file configuration per host
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ServiceLogConfig {
|
||||
pub service_name: String,
|
||||
pub log_file_path: String,
|
||||
}
|
||||
|
||||
impl DashboardConfig {
|
||||
pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||
let path = path.as_ref();
|
||||
@@ -60,8 +86,3 @@ impl Default for ZmqConfig {
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for HostsConfig {
|
||||
fn default() -> Self {
|
||||
panic!("Dashboard configuration must be loaded from file - no hardcoded defaults allowed")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,8 +11,8 @@ pub struct MetricStore {
|
||||
current_metrics: HashMap<String, HashMap<String, Metric>>,
|
||||
/// Historical metrics for trending
|
||||
historical_metrics: HashMap<String, Vec<MetricDataPoint>>,
|
||||
/// Last update timestamp per host
|
||||
last_update: HashMap<String, Instant>,
|
||||
/// Last heartbeat timestamp per host
|
||||
last_heartbeat: HashMap<String, Instant>,
|
||||
/// Configuration
|
||||
max_metrics_per_host: usize,
|
||||
history_retention: Duration,
|
||||
@@ -23,7 +23,7 @@ impl MetricStore {
|
||||
Self {
|
||||
current_metrics: HashMap::new(),
|
||||
historical_metrics: HashMap::new(),
|
||||
last_update: HashMap::new(),
|
||||
last_heartbeat: HashMap::new(),
|
||||
max_metrics_per_host,
|
||||
history_retention: Duration::from_secs(history_retention_hours * 3600),
|
||||
}
|
||||
@@ -56,10 +56,13 @@ impl MetricStore {
|
||||
|
||||
// Add to history
|
||||
host_history.push(MetricDataPoint { received_at: now });
|
||||
}
|
||||
|
||||
// Update last update timestamp
|
||||
self.last_update.insert(hostname.to_string(), now);
|
||||
// Track heartbeat metrics for connectivity detection
|
||||
if metric_name == "agent_heartbeat" {
|
||||
self.last_heartbeat.insert(hostname.to_string(), now);
|
||||
debug!("Updated heartbeat for host {}", hostname);
|
||||
}
|
||||
}
|
||||
|
||||
// Get metrics count before cleanup
|
||||
let metrics_count = host_metrics.len();
|
||||
@@ -88,22 +91,46 @@ impl MetricStore {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get connected hosts (hosts with recent updates)
|
||||
/// Get connected hosts (hosts with recent heartbeats)
|
||||
pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> {
|
||||
let now = Instant::now();
|
||||
|
||||
self.last_update
|
||||
self.last_heartbeat
|
||||
.iter()
|
||||
.filter_map(|(hostname, &last_update)| {
|
||||
if now.duration_since(last_update) <= timeout {
|
||||
.filter_map(|(hostname, &last_heartbeat)| {
|
||||
if now.duration_since(last_heartbeat) <= timeout {
|
||||
Some(hostname.clone())
|
||||
} else {
|
||||
debug!("Host {} considered offline - last heartbeat was {:?} ago",
|
||||
hostname, now.duration_since(last_heartbeat));
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Clean up data for offline hosts
|
||||
pub fn cleanup_offline_hosts(&mut self, timeout: Duration) {
|
||||
let now = Instant::now();
|
||||
let mut hosts_to_cleanup = Vec::new();
|
||||
|
||||
// Find hosts that are offline (no recent heartbeat)
|
||||
for (hostname, &last_heartbeat) in &self.last_heartbeat {
|
||||
if now.duration_since(last_heartbeat) > timeout {
|
||||
hosts_to_cleanup.push(hostname.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Clear metrics for offline hosts
|
||||
for hostname in hosts_to_cleanup {
|
||||
if let Some(metrics) = self.current_metrics.remove(&hostname) {
|
||||
info!("Cleared {} metrics for offline host: {}", metrics.len(), hostname);
|
||||
}
|
||||
// Keep heartbeat timestamp for reconnection detection
|
||||
// Don't remove from last_heartbeat to track when host was last seen
|
||||
}
|
||||
}
|
||||
|
||||
/// Cleanup old data and enforce limits
|
||||
fn cleanup_host_data(&mut self, hostname: &str) {
|
||||
let now = Instant::now();
|
||||
|
||||
@@ -9,6 +9,7 @@ use ratatui::{
|
||||
use std::collections::HashMap;
|
||||
use std::time::Instant;
|
||||
use tracing::info;
|
||||
use wake_on_lan::MagicPacket;
|
||||
|
||||
pub mod theme;
|
||||
pub mod widgets;
|
||||
@@ -89,19 +90,33 @@ pub struct TuiApp {
|
||||
user_navigated_away: bool,
|
||||
/// Dashboard configuration
|
||||
config: DashboardConfig,
|
||||
/// Cached localhost hostname to avoid repeated system calls
|
||||
localhost: String,
|
||||
}
|
||||
|
||||
impl TuiApp {
|
||||
pub fn new(config: DashboardConfig) -> Self {
|
||||
Self {
|
||||
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
||||
let mut app = Self {
|
||||
host_widgets: HashMap::new(),
|
||||
current_host: None,
|
||||
available_hosts: Vec::new(),
|
||||
available_hosts: config.hosts.keys().cloned().collect(),
|
||||
host_index: 0,
|
||||
should_quit: false,
|
||||
user_navigated_away: false,
|
||||
config,
|
||||
localhost,
|
||||
};
|
||||
|
||||
// Sort predefined hosts
|
||||
app.available_hosts.sort();
|
||||
|
||||
// Initialize with first host if available
|
||||
if !app.available_hosts.is_empty() {
|
||||
app.current_host = Some(app.available_hosts[0].clone());
|
||||
}
|
||||
|
||||
app
|
||||
}
|
||||
|
||||
/// Get or create host widgets for the given hostname
|
||||
@@ -120,31 +135,31 @@ impl TuiApp {
|
||||
// Only update widgets if we have metrics for this host
|
||||
let all_metrics = metric_store.get_metrics_for_host(&hostname);
|
||||
if !all_metrics.is_empty() {
|
||||
// Get metrics first while hostname is borrowed
|
||||
let cpu_metrics: Vec<&Metric> = all_metrics
|
||||
.iter()
|
||||
.filter(|m| {
|
||||
m.name.starts_with("cpu_")
|
||||
|| m.name.contains("c_state_")
|
||||
|| m.name.starts_with("process_top_")
|
||||
})
|
||||
.copied()
|
||||
.collect();
|
||||
let memory_metrics: Vec<&Metric> = all_metrics
|
||||
.iter()
|
||||
.filter(|m| m.name.starts_with("memory_") || m.name.starts_with("disk_tmp_"))
|
||||
.copied()
|
||||
.collect();
|
||||
let service_metrics: Vec<&Metric> = all_metrics
|
||||
.iter()
|
||||
.filter(|m| m.name.starts_with("service_"))
|
||||
.copied()
|
||||
.collect();
|
||||
let all_backup_metrics: Vec<&Metric> = all_metrics
|
||||
.iter()
|
||||
.filter(|m| m.name.starts_with("backup_"))
|
||||
.copied()
|
||||
.collect();
|
||||
// Single pass metric categorization for better performance
|
||||
let mut cpu_metrics = Vec::new();
|
||||
let mut memory_metrics = Vec::new();
|
||||
let mut service_metrics = Vec::new();
|
||||
let mut backup_metrics = Vec::new();
|
||||
let mut nixos_metrics = Vec::new();
|
||||
let mut disk_metrics = Vec::new();
|
||||
|
||||
for metric in all_metrics {
|
||||
if metric.name.starts_with("cpu_")
|
||||
|| metric.name.contains("c_state_")
|
||||
|| metric.name.starts_with("process_top_") {
|
||||
cpu_metrics.push(metric);
|
||||
} else if metric.name.starts_with("memory_") || metric.name.starts_with("disk_tmp_") {
|
||||
memory_metrics.push(metric);
|
||||
} else if metric.name.starts_with("service_") {
|
||||
service_metrics.push(metric);
|
||||
} else if metric.name.starts_with("backup_") {
|
||||
backup_metrics.push(metric);
|
||||
} else if metric.name == "system_nixos_build" || metric.name == "system_active_users" || metric.name == "agent_version" {
|
||||
nixos_metrics.push(metric);
|
||||
} else if metric.name.starts_with("disk_") {
|
||||
disk_metrics.push(metric);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear completed transitions first
|
||||
self.clear_completed_transitions(&hostname, &service_metrics);
|
||||
@@ -155,21 +170,7 @@ impl TuiApp {
|
||||
// Collect all system metrics (CPU, memory, NixOS, disk/storage)
|
||||
let mut system_metrics = cpu_metrics;
|
||||
system_metrics.extend(memory_metrics);
|
||||
|
||||
// Add NixOS metrics - using exact matching for build display fix
|
||||
let nixos_metrics: Vec<&Metric> = all_metrics
|
||||
.iter()
|
||||
.filter(|m| m.name == "system_nixos_build" || m.name == "system_active_users" || m.name == "agent_version")
|
||||
.copied()
|
||||
.collect();
|
||||
system_metrics.extend(nixos_metrics);
|
||||
|
||||
// Add disk/storage metrics
|
||||
let disk_metrics: Vec<&Metric> = all_metrics
|
||||
.iter()
|
||||
.filter(|m| m.name.starts_with("disk_"))
|
||||
.copied()
|
||||
.collect();
|
||||
system_metrics.extend(disk_metrics);
|
||||
|
||||
host_widgets.system_widget.update_from_metrics(&system_metrics);
|
||||
@@ -178,7 +179,7 @@ impl TuiApp {
|
||||
.update_from_metrics(&service_metrics);
|
||||
host_widgets
|
||||
.backup_widget
|
||||
.update_from_metrics(&all_backup_metrics);
|
||||
.update_from_metrics(&backup_metrics);
|
||||
|
||||
host_widgets.last_update = Some(Instant::now());
|
||||
}
|
||||
@@ -186,30 +187,36 @@ impl TuiApp {
|
||||
}
|
||||
|
||||
/// Update available hosts with localhost prioritization
|
||||
pub fn update_hosts(&mut self, hosts: Vec<String>) {
|
||||
// Sort hosts alphabetically
|
||||
let mut sorted_hosts = hosts.clone();
|
||||
pub fn update_hosts(&mut self, discovered_hosts: Vec<String>) {
|
||||
// Start with configured hosts (always visible)
|
||||
let mut all_hosts: Vec<String> = self.config.hosts.keys().cloned().collect();
|
||||
|
||||
// Add any discovered hosts that aren't already configured
|
||||
for host in discovered_hosts {
|
||||
if !all_hosts.contains(&host) {
|
||||
all_hosts.push(host);
|
||||
}
|
||||
}
|
||||
|
||||
// Keep hosts that have pending transitions even if they're offline
|
||||
for (hostname, host_widgets) in &self.host_widgets {
|
||||
if !host_widgets.pending_service_transitions.is_empty() {
|
||||
if !sorted_hosts.contains(hostname) {
|
||||
sorted_hosts.push(hostname.clone());
|
||||
if !all_hosts.contains(hostname) {
|
||||
all_hosts.push(hostname.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sorted_hosts.sort();
|
||||
self.available_hosts = sorted_hosts;
|
||||
all_hosts.sort();
|
||||
self.available_hosts = all_hosts;
|
||||
|
||||
// Get the current hostname (localhost) for auto-selection
|
||||
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
||||
if !self.available_hosts.is_empty() {
|
||||
if self.available_hosts.contains(&localhost) && !self.user_navigated_away {
|
||||
if self.available_hosts.contains(&self.localhost) && !self.user_navigated_away {
|
||||
// Localhost is available and user hasn't navigated away - switch to it
|
||||
self.current_host = Some(localhost.clone());
|
||||
self.current_host = Some(self.localhost.clone());
|
||||
// Find the actual index of localhost in the sorted list
|
||||
self.host_index = self.available_hosts.iter().position(|h| h == &localhost).unwrap_or(0);
|
||||
self.host_index = self.available_hosts.iter().position(|h| h == &self.localhost).unwrap_or(0);
|
||||
} else if self.current_host.is_none() {
|
||||
// No current host - select first available (which is localhost if available)
|
||||
self.current_host = Some(self.available_hosts[0].clone());
|
||||
@@ -244,22 +251,22 @@ impl TuiApp {
|
||||
KeyCode::Char('r') => {
|
||||
// System rebuild command - works on any panel for current host
|
||||
if let Some(hostname) = self.current_host.clone() {
|
||||
// Create command that shows CM Dashboard logo and then rebuilds
|
||||
let connection_ip = self.get_connection_ip(&hostname);
|
||||
// Create command that shows logo, rebuilds, and waits for user input
|
||||
let logo_and_rebuild = format!(
|
||||
r"cat << 'EOF'
|
||||
NixOS System Rebuild
|
||||
Target: {}
|
||||
|
||||
EOF
|
||||
ssh -tt {}@{} 'bash -ic {}'",
|
||||
"bash -c 'cat << \"EOF\"\nNixOS System Rebuild\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"bash -ic {}\"\necho\necho \"========================================\"\necho \"Rebuild completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
|
||||
hostname,
|
||||
connection_ip,
|
||||
self.config.ssh.rebuild_user,
|
||||
hostname,
|
||||
connection_ip,
|
||||
self.config.ssh.rebuild_alias
|
||||
);
|
||||
|
||||
std::process::Command::new("tmux")
|
||||
.arg("display-popup")
|
||||
.arg("split-window")
|
||||
.arg("-v")
|
||||
.arg("-p")
|
||||
.arg("30")
|
||||
.arg(&logo_and_rebuild)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
@@ -281,6 +288,53 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
}
|
||||
}
|
||||
}
|
||||
KeyCode::Char('J') => {
|
||||
// Show service logs via journalctl in tmux split window
|
||||
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||
let connection_ip = self.get_connection_ip(&hostname);
|
||||
let journalctl_command = format!(
|
||||
"bash -c \"ssh -tt {}@{} 'sudo journalctl -u {}.service -f --no-pager -n 50'; exit\"",
|
||||
self.config.ssh.rebuild_user,
|
||||
connection_ip,
|
||||
service_name
|
||||
);
|
||||
|
||||
std::process::Command::new("tmux")
|
||||
.arg("split-window")
|
||||
.arg("-v")
|
||||
.arg("-p")
|
||||
.arg("30")
|
||||
.arg(&journalctl_command)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
}
|
||||
}
|
||||
KeyCode::Char('L') => {
|
||||
// Show custom service log file in tmux split window
|
||||
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||
// Check if this service has a custom log file configured
|
||||
if let Some(host_logs) = self.config.service_logs.get(&hostname) {
|
||||
if let Some(log_config) = host_logs.iter().find(|config| config.service_name == service_name) {
|
||||
let connection_ip = self.get_connection_ip(&hostname);
|
||||
let tail_command = format!(
|
||||
"bash -c \"ssh -tt {}@{} 'sudo tail -n 50 -f {}'; exit\"",
|
||||
self.config.ssh.rebuild_user,
|
||||
connection_ip,
|
||||
log_config.log_file_path
|
||||
);
|
||||
|
||||
std::process::Command::new("tmux")
|
||||
.arg("split-window")
|
||||
.arg("-v")
|
||||
.arg("-p")
|
||||
.arg("30")
|
||||
.arg(&tail_command)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
KeyCode::Char('b') => {
|
||||
// Trigger backup
|
||||
if let Some(hostname) = self.current_host.clone() {
|
||||
@@ -288,6 +342,53 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
return Ok(Some(UiCommand::TriggerBackup { hostname }));
|
||||
}
|
||||
}
|
||||
KeyCode::Char('w') => {
|
||||
// Wake on LAN for offline hosts
|
||||
if let Some(hostname) = self.current_host.clone() {
|
||||
// Check if host has MAC address configured
|
||||
if let Some(host_details) = self.config.hosts.get(&hostname) {
|
||||
if let Some(mac_address) = &host_details.mac_address {
|
||||
// Parse MAC address and send WoL packet
|
||||
let mac_bytes = Self::parse_mac_address(mac_address);
|
||||
match mac_bytes {
|
||||
Ok(mac) => {
|
||||
match MagicPacket::new(&mac).send() {
|
||||
Ok(_) => {
|
||||
info!("WakeOnLAN packet sent successfully to {} ({})", hostname, mac_address);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to send WakeOnLAN packet to {}: {}", hostname, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::error!("Invalid MAC address format for {}: {}", hostname, mac_address);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
KeyCode::Char('t') => {
|
||||
// Open SSH terminal session in tmux window
|
||||
if let Some(hostname) = self.current_host.clone() {
|
||||
let connection_ip = self.get_connection_ip(&hostname);
|
||||
let ssh_command = format!(
|
||||
"ssh -tt {}@{}",
|
||||
self.config.ssh.rebuild_user,
|
||||
connection_ip
|
||||
);
|
||||
|
||||
std::process::Command::new("tmux")
|
||||
.arg("split-window")
|
||||
.arg("-v")
|
||||
.arg("-p")
|
||||
.arg("30") // Use 30% like other commands
|
||||
.arg(&ssh_command)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
}
|
||||
}
|
||||
KeyCode::Tab => {
|
||||
// Tab cycles to next host
|
||||
self.navigate_host(1);
|
||||
@@ -336,9 +437,8 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
self.current_host = Some(self.available_hosts[self.host_index].clone());
|
||||
|
||||
// Check if user navigated away from localhost
|
||||
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
||||
if let Some(ref current) = self.current_host {
|
||||
if current != &localhost {
|
||||
if current != &self.localhost {
|
||||
self.user_navigated_away = true;
|
||||
} else {
|
||||
self.user_navigated_away = false; // User navigated back to localhost
|
||||
@@ -482,6 +582,21 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
])
|
||||
.split(main_chunks[1]); // main_chunks[1] is now the content area (between title and statusbar)
|
||||
|
||||
// Check if current host is offline
|
||||
let current_host_offline = if let Some(hostname) = self.current_host.clone() {
|
||||
self.calculate_host_status(&hostname, metric_store) == Status::Offline
|
||||
} else {
|
||||
true // No host selected is considered offline
|
||||
};
|
||||
|
||||
// If host is offline, render wake-up message instead of panels
|
||||
if current_host_offline {
|
||||
self.render_offline_host_message(frame, main_chunks[1]);
|
||||
self.render_btop_title(frame, main_chunks[0], metric_store);
|
||||
self.render_statusbar(frame, main_chunks[2]);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if backup panel should be shown
|
||||
let show_backup = if let Some(hostname) = self.current_host.clone() {
|
||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||
@@ -549,11 +664,14 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate worst-case status across all hosts
|
||||
// Calculate worst-case status across all hosts (excluding offline)
|
||||
let mut worst_status = Status::Ok;
|
||||
for host in &self.available_hosts {
|
||||
let host_status = self.calculate_host_status(host, metric_store);
|
||||
worst_status = Status::aggregate(&[worst_status, host_status]);
|
||||
// Don't include offline hosts in status aggregation
|
||||
if host_status != Status::Offline {
|
||||
worst_status = Status::aggregate(&[worst_status, host_status]);
|
||||
}
|
||||
}
|
||||
|
||||
// Use the worst status color as background
|
||||
@@ -631,7 +749,7 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
let metrics = metric_store.get_metrics_for_host(hostname);
|
||||
|
||||
if metrics.is_empty() {
|
||||
return Status::Unknown;
|
||||
return Status::Offline;
|
||||
}
|
||||
|
||||
// First check if we have the aggregated host status summary from the agent
|
||||
@@ -651,7 +769,8 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
Status::Warning => has_warning = true,
|
||||
Status::Pending => has_pending = true,
|
||||
Status::Ok => ok_count += 1,
|
||||
Status::Unknown => {} // Ignore unknown for aggregation
|
||||
Status::Unknown => {}, // Ignore unknown for aggregation
|
||||
Status::Offline => {}, // Ignore offline for aggregation
|
||||
}
|
||||
}
|
||||
|
||||
@@ -686,10 +805,13 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
let mut shortcuts = Vec::new();
|
||||
|
||||
// Global shortcuts
|
||||
shortcuts.push("Tab: Switch Host".to_string());
|
||||
shortcuts.push("↑↓/jk: Select Service".to_string());
|
||||
shortcuts.push("r: Rebuild Host".to_string());
|
||||
shortcuts.push("s/S: Start/Stop Service".to_string());
|
||||
shortcuts.push("Tab: Host".to_string());
|
||||
shortcuts.push("↑↓/jk: Select".to_string());
|
||||
shortcuts.push("r: Rebuild".to_string());
|
||||
shortcuts.push("s/S: Start/Stop".to_string());
|
||||
shortcuts.push("J: Logs".to_string());
|
||||
shortcuts.push("L: Custom".to_string());
|
||||
shortcuts.push("w: Wake".to_string());
|
||||
|
||||
// Always show quit
|
||||
shortcuts.push("q: Quit".to_string());
|
||||
@@ -707,8 +829,10 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||
host_widgets.system_scroll_offset
|
||||
};
|
||||
// Clone the config to avoid borrowing issues
|
||||
let config = self.config.clone();
|
||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||
host_widgets.system_widget.render_with_scroll(frame, inner_area, scroll_offset, &hostname);
|
||||
host_widgets.system_widget.render_with_scroll(frame, inner_area, scroll_offset, &hostname, Some(&config));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -728,5 +852,100 @@ ssh -tt {}@{} 'bash -ic {}'",
|
||||
}
|
||||
}
|
||||
|
||||
/// Render offline host message with wake-up option
|
||||
fn render_offline_host_message(&self, frame: &mut Frame, area: Rect) {
|
||||
use ratatui::layout::Alignment;
|
||||
use ratatui::style::Modifier;
|
||||
use ratatui::text::{Line, Span};
|
||||
use ratatui::widgets::{Block, Borders, Paragraph};
|
||||
|
||||
// Get hostname for message
|
||||
let hostname = self.current_host.as_ref()
|
||||
.map(|h| h.as_str())
|
||||
.unwrap_or("Unknown");
|
||||
|
||||
// Check if host has MAC address for wake-on-LAN
|
||||
let has_mac = self.current_host.as_ref()
|
||||
.and_then(|hostname| self.config.hosts.get(hostname))
|
||||
.and_then(|details| details.mac_address.as_ref())
|
||||
.is_some();
|
||||
|
||||
// Create message content
|
||||
let mut lines = vec![
|
||||
Line::from(Span::styled(
|
||||
format!("Host '{}' is offline", hostname),
|
||||
Style::default().fg(Theme::muted_text()).add_modifier(Modifier::BOLD),
|
||||
)),
|
||||
Line::from(""),
|
||||
];
|
||||
|
||||
if has_mac {
|
||||
lines.push(Line::from(Span::styled(
|
||||
"Press 'w' to wake up host",
|
||||
Style::default().fg(Theme::primary_text()).add_modifier(Modifier::BOLD),
|
||||
)));
|
||||
} else {
|
||||
lines.push(Line::from(Span::styled(
|
||||
"No MAC address configured - cannot wake up",
|
||||
Style::default().fg(Theme::muted_text()),
|
||||
)));
|
||||
}
|
||||
|
||||
// Create centered message
|
||||
let message = Paragraph::new(lines)
|
||||
.block(Block::default()
|
||||
.borders(Borders::ALL)
|
||||
.border_style(Style::default().fg(Theme::muted_text()))
|
||||
.title(" Offline Host ")
|
||||
.title_style(Style::default().fg(Theme::muted_text()).add_modifier(Modifier::BOLD)))
|
||||
.style(Style::default().bg(Theme::background()).fg(Theme::primary_text()))
|
||||
.alignment(Alignment::Center);
|
||||
|
||||
// Center the message in the available area
|
||||
let popup_area = ratatui::layout::Layout::default()
|
||||
.direction(Direction::Vertical)
|
||||
.constraints([
|
||||
Constraint::Percentage(40),
|
||||
Constraint::Length(6),
|
||||
Constraint::Percentage(40),
|
||||
])
|
||||
.split(area)[1];
|
||||
|
||||
let popup_area = ratatui::layout::Layout::default()
|
||||
.direction(Direction::Horizontal)
|
||||
.constraints([
|
||||
Constraint::Percentage(25),
|
||||
Constraint::Percentage(50),
|
||||
Constraint::Percentage(25),
|
||||
])
|
||||
.split(popup_area)[1];
|
||||
|
||||
frame.render_widget(message, popup_area);
|
||||
}
|
||||
|
||||
/// Parse MAC address string (e.g., "AA:BB:CC:DD:EE:FF") to [u8; 6]
|
||||
/// Get the connection IP for a hostname based on host configuration
|
||||
fn get_connection_ip(&self, hostname: &str) -> String {
|
||||
if let Some(host_details) = self.config.hosts.get(hostname) {
|
||||
host_details.get_connection_ip(hostname)
|
||||
} else {
|
||||
hostname.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_mac_address(mac_str: &str) -> Result<[u8; 6], &'static str> {
|
||||
let parts: Vec<&str> = mac_str.split(':').collect();
|
||||
if parts.len() != 6 {
|
||||
return Err("MAC address must have 6 parts separated by colons");
|
||||
}
|
||||
|
||||
let mut mac = [0u8; 6];
|
||||
for (i, part) in parts.iter().enumerate() {
|
||||
match u8::from_str_radix(part, 16) {
|
||||
Ok(byte) => mac[i] = byte,
|
||||
Err(_) => return Err("Invalid hexadecimal byte in MAC address"),
|
||||
}
|
||||
}
|
||||
Ok(mac)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,6 +147,7 @@ impl Theme {
|
||||
Status::Warning => Self::warning(),
|
||||
Status::Critical => Self::error(),
|
||||
Status::Unknown => Self::muted_text(),
|
||||
Status::Offline => Self::muted_text(), // Dark gray for offline
|
||||
}
|
||||
}
|
||||
|
||||
@@ -244,8 +245,9 @@ impl StatusIcons {
|
||||
Status::Ok => "●",
|
||||
Status::Pending => "◉", // Hollow circle for pending
|
||||
Status::Warning => "◐",
|
||||
Status::Critical => "◯",
|
||||
Status::Critical => "!",
|
||||
Status::Unknown => "?",
|
||||
Status::Offline => "○", // Empty circle for offline
|
||||
}
|
||||
}
|
||||
|
||||
@@ -258,6 +260,7 @@ impl StatusIcons {
|
||||
Status::Warning => Theme::warning(), // Yellow
|
||||
Status::Critical => Theme::error(), // Red
|
||||
Status::Unknown => Theme::muted_text(), // Gray
|
||||
Status::Offline => Theme::muted_text(), // Dark gray for offline
|
||||
};
|
||||
|
||||
vec![
|
||||
|
||||
@@ -146,6 +146,7 @@ impl ServicesWidget {
|
||||
Status::Warning => Theme::warning(),
|
||||
Status::Critical => Theme::error(),
|
||||
Status::Unknown => Theme::muted_text(),
|
||||
Status::Offline => Theme::muted_text(),
|
||||
};
|
||||
|
||||
(icon.to_string(), info.status.clone(), status_color)
|
||||
|
||||
@@ -439,7 +439,7 @@ impl Widget for SystemWidget {
|
||||
|
||||
impl SystemWidget {
|
||||
/// Render with scroll offset support
|
||||
pub fn render_with_scroll(&mut self, frame: &mut Frame, area: Rect, scroll_offset: usize, hostname: &str) {
|
||||
pub fn render_with_scroll(&mut self, frame: &mut Frame, area: Rect, scroll_offset: usize, hostname: &str, config: Option<&crate::config::DashboardConfig>) {
|
||||
let mut lines = Vec::new();
|
||||
|
||||
// NixOS section
|
||||
@@ -457,6 +457,16 @@ impl SystemWidget {
|
||||
Span::styled(format!("Agent: {}", agent_version_text), Typography::secondary())
|
||||
]));
|
||||
|
||||
// Display detected connection IP
|
||||
if let Some(config) = config {
|
||||
if let Some(host_details) = config.hosts.get(hostname) {
|
||||
let detected_ip = host_details.get_connection_ip(hostname);
|
||||
lines.push(Line::from(vec![
|
||||
Span::styled(format!("IP: {}", detected_ip), Typography::secondary())
|
||||
]));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// CPU section
|
||||
lines.push(Line::from(vec![
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.45"
|
||||
version = "0.1.71"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -87,6 +87,7 @@ pub enum Status {
|
||||
Warning,
|
||||
Critical,
|
||||
Unknown,
|
||||
Offline,
|
||||
}
|
||||
|
||||
impl Status {
|
||||
@@ -190,6 +191,16 @@ impl HysteresisThresholds {
|
||||
Status::Ok
|
||||
}
|
||||
}
|
||||
Status::Offline => {
|
||||
// Host coming back online, use normal thresholds like first measurement
|
||||
if value >= self.critical_high {
|
||||
Status::Critical
|
||||
} else if value >= self.warning_high {
|
||||
Status::Warning
|
||||
} else {
|
||||
Status::Ok
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user