Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 69892a2d84 | |||
| a928d73134 | |||
| af52d49194 | |||
| bc94f75328 | |||
| b6da71b7e7 | |||
| aaf7edfbce | |||
| bb72c42726 | |||
| af5f96ce2f |
@@ -113,8 +113,8 @@ jobs:
|
||||
NIX_HASH="sha256-$(python3 -c "import base64, binascii; print(base64.b64encode(binascii.unhexlify('$NEW_HASH')).decode())")"
|
||||
|
||||
# Update the NixOS configuration
|
||||
sed -i "s/version = \"v[^\"]*\"/version = \"$VERSION\"/" hosts/common/cm-dashboard.nix
|
||||
sed -i "s/sha256 = \"sha256-[^\"]*\"/sha256 = \"$NIX_HASH\"/" hosts/common/cm-dashboard.nix
|
||||
sed -i "s|version = \"v[^\"]*\"|version = \"$VERSION\"|" hosts/common/cm-dashboard.nix
|
||||
sed -i "s|sha256 = \"sha256-[^\"]*\"|sha256 = \"$NIX_HASH\"|" hosts/common/cm-dashboard.nix
|
||||
|
||||
# Commit and push changes
|
||||
git config user.name "Gitea Actions"
|
||||
|
||||
64
CLAUDE.md
64
CLAUDE.md
@@ -28,18 +28,21 @@ All keyboard navigation and service selection features successfully implemented:
|
||||
- ✅ **Smart Panel Switching**: Only cycles through panels with data (backup panel conditional)
|
||||
- ✅ **Scroll Support**: All panels support content scrolling with proper overflow indicators
|
||||
|
||||
**Current Status - October 25, 2025:**
|
||||
**Current Status - October 26, 2025:**
|
||||
- All keyboard navigation features working correctly ✅
|
||||
- Service selection cursor implemented with focus-aware highlighting ✅
|
||||
- Panel scrolling fixed for System, Services, and Backup panels ✅
|
||||
- Build display working: "Build: 25.05.20251004.3bcc93c" ✅
|
||||
- Configuration hash display: Currently shows git hash, needs to be fixed ❌
|
||||
- Agent version display working: "Agent: 3kvc03nd" ✅
|
||||
- Cross-host version comparison implemented ✅
|
||||
- Automated binary release system working ✅
|
||||
- SMART data consolidated into disk collector ✅
|
||||
|
||||
**Target Layout:**
|
||||
**Current Layout:**
|
||||
```
|
||||
NixOS:
|
||||
Build: 25.05.20251004.3bcc93c
|
||||
Config: d8ivwiar # Should show nix store hash (8 chars) from deployed system
|
||||
Agent: 3kvc03nd # Shows agent version (nix store hash)
|
||||
Active users: cm, simon
|
||||
CPU:
|
||||
● Load: 0.02 0.31 0.86 • 3000MHz
|
||||
@@ -55,7 +58,8 @@ Storage:
|
||||
**System panel layout fully implemented with blue tree symbols ✅**
|
||||
**Tree symbols now use consistent blue theming across all panels ✅**
|
||||
**Overflow handling restored for all widgets ("... and X more") ✅**
|
||||
**Agent hash display working correctly ✅**
|
||||
**Agent version display working correctly ✅**
|
||||
**Cross-host version comparison logging warnings ✅**
|
||||
|
||||
### Current Keyboard Navigation Implementation
|
||||
|
||||
@@ -88,6 +92,56 @@ Storage:
|
||||
- ✅ **Git Clone Approach**: Implemented for nixos-rebuild to avoid directory permissions
|
||||
- ✅ **Visual Feedback**: Directional arrows for service status (↑ starting, ↓ stopping, ↻ restarting)
|
||||
|
||||
### Terminal Popup for Real-time Output - IMPLEMENTED ✅
|
||||
|
||||
**Status (as of 2025-10-26):**
|
||||
- ✅ **Terminal Popup UI**: 80% screen coverage with terminal styling and color-coded output
|
||||
- ✅ **ZMQ Streaming Protocol**: CommandOutputMessage for real-time output transmission
|
||||
- ✅ **Keyboard Controls**: ESC/Q to close, ↑↓ to scroll, manual close (no auto-close)
|
||||
- ✅ **Real-time Display**: Live streaming of command output as it happens
|
||||
- ✅ **Version-based Agent Reporting**: Shows "Agent: v0.1.13" instead of nix store hash
|
||||
|
||||
**Current Implementation Issues:**
|
||||
- ❌ **Agent Process Crashes**: Agent dies during nixos-rebuild execution
|
||||
- ❌ **Inconsistent Output**: Different outputs each time 'R' is pressed
|
||||
- ❌ **Limited Output Visibility**: Not capturing all nixos-rebuild progress
|
||||
|
||||
**PLANNED SOLUTION - Systemd Service Approach:**
|
||||
|
||||
**Problem**: Direct nixos-rebuild execution in agent causes process crashes and inconsistent output.
|
||||
|
||||
**Solution**: Create dedicated systemd service for rebuild operations.
|
||||
|
||||
**Implementation Plan:**
|
||||
1. **NixOS Systemd Service**:
|
||||
```nix
|
||||
systemd.services.cm-rebuild = {
|
||||
description = "CM Dashboard NixOS Rebuild";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = "${pkgs.nixos-rebuild}/bin/nixos-rebuild switch --flake . --option sandbox false";
|
||||
WorkingDirectory = "/var/lib/cm-dashboard/nixos-config";
|
||||
User = "root";
|
||||
StandardOutput = "journal";
|
||||
StandardError = "journal";
|
||||
};
|
||||
};
|
||||
```
|
||||
|
||||
2. **Agent Modification**:
|
||||
- Replace direct nixos-rebuild execution with: `systemctl start cm-rebuild`
|
||||
- Stream output via: `journalctl -u cm-rebuild -f --no-pager`
|
||||
- Monitor service status for completion detection
|
||||
|
||||
3. **Benefits**:
|
||||
- **Process Isolation**: Service runs independently, won't crash agent
|
||||
- **Consistent Output**: Always same deterministic rebuild process
|
||||
- **Proper Logging**: systemd journal handles all output management
|
||||
- **Resource Management**: systemd manages cleanup and resource limits
|
||||
- **Status Tracking**: Can query service status (running/failed/success)
|
||||
|
||||
**Next Priority**: Implement systemd service approach for reliable rebuild operations.
|
||||
|
||||
**Keyboard Controls Status:**
|
||||
- **Services Panel**:
|
||||
- R (restart) ✅ Working
|
||||
|
||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.0"
|
||||
version = "0.1.13"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@@ -291,7 +291,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.0"
|
||||
version = "0.1.13"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -314,7 +314,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.0"
|
||||
version = "0.1.13"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
|
||||
@@ -152,10 +152,13 @@ interval_seconds = 10
|
||||
memory_warning_mb = 1000.0
|
||||
memory_critical_mb = 2000.0
|
||||
service_name_filters = [
|
||||
"nginx", "postgresql", "redis", "docker", "sshd"
|
||||
"nginx*", "postgresql*", "redis*", "docker*", "sshd*",
|
||||
"gitea*", "immich*", "haasp*", "mosquitto*", "mysql*",
|
||||
"unifi*", "vaultwarden*"
|
||||
]
|
||||
excluded_services = [
|
||||
"nginx-config-reload", "sshd-keygen"
|
||||
"nginx-config-reload", "sshd-keygen", "systemd-",
|
||||
"getty@", "user@", "dbus-", "NetworkManager-"
|
||||
]
|
||||
|
||||
[notifications]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.0"
|
||||
version = "0.1.13"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -9,7 +9,7 @@ use crate::config::AgentConfig;
|
||||
use crate::metrics::MetricCollectionManager;
|
||||
use crate::notifications::NotificationManager;
|
||||
use crate::status::HostStatusManager;
|
||||
use cm_dashboard_shared::{Metric, MetricMessage};
|
||||
use cm_dashboard_shared::{CommandOutputMessage, Metric, MetricMessage, MetricValue, Status};
|
||||
|
||||
pub struct Agent {
|
||||
hostname: String,
|
||||
@@ -162,6 +162,10 @@ impl Agent {
|
||||
let host_status_metric = self.host_status_manager.get_host_status_metric();
|
||||
metrics.push(host_status_metric);
|
||||
|
||||
// Add agent version metric for cross-host version comparison
|
||||
let version_metric = self.get_agent_version_metric();
|
||||
metrics.push(version_metric);
|
||||
|
||||
if metrics.is_empty() {
|
||||
debug!("No metrics to broadcast");
|
||||
return Ok(());
|
||||
@@ -183,6 +187,24 @@ impl Agent {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create agent version metric for cross-host version comparison
|
||||
fn get_agent_version_metric(&self) -> Metric {
|
||||
// Get version from executable path (same logic as main.rs get_version)
|
||||
let version = self.get_agent_version();
|
||||
|
||||
Metric::new(
|
||||
"agent_version".to_string(),
|
||||
MetricValue::String(version),
|
||||
Status::Ok,
|
||||
)
|
||||
}
|
||||
|
||||
/// Get agent version from Cargo package version
|
||||
fn get_agent_version(&self) -> String {
|
||||
// Use the version from Cargo.toml (e.g., "0.1.11")
|
||||
format!("v{}", env!("CARGO_PKG_VERSION"))
|
||||
}
|
||||
|
||||
async fn handle_commands(&mut self) -> Result<()> {
|
||||
// Try to receive commands (non-blocking)
|
||||
match self.zmq_handler.try_receive_command() {
|
||||
@@ -281,73 +303,208 @@ impl Agent {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle NixOS system rebuild commands with git clone approach
|
||||
/// Handle NixOS system rebuild commands with real-time output streaming
|
||||
async fn handle_system_rebuild(&self, git_url: &str, git_branch: &str, working_dir: &str, api_key_file: Option<&str>) -> Result<()> {
|
||||
info!("Starting NixOS system rebuild: {} @ {} -> {}", git_url, git_branch, working_dir);
|
||||
|
||||
let command_id = format!("rebuild_{}", chrono::Utc::now().timestamp());
|
||||
|
||||
// Send initial status
|
||||
self.send_command_output(&command_id, "SystemRebuild", "Starting NixOS system rebuild...").await?;
|
||||
|
||||
// Enable maintenance mode before rebuild
|
||||
let maintenance_file = "/tmp/cm-maintenance";
|
||||
if let Err(e) = tokio::fs::File::create(maintenance_file).await {
|
||||
error!("Failed to create maintenance mode file: {}", e);
|
||||
self.send_command_output(&command_id, "SystemRebuild", &format!("Warning: Failed to create maintenance mode file: {}", e)).await?;
|
||||
} else {
|
||||
info!("Maintenance mode enabled");
|
||||
self.send_command_output(&command_id, "SystemRebuild", "Maintenance mode enabled").await?;
|
||||
}
|
||||
|
||||
// Clone or update repository
|
||||
let git_result = self.ensure_git_repository(git_url, git_branch, working_dir, api_key_file).await;
|
||||
self.send_command_output(&command_id, "SystemRebuild", "Cloning/updating git repository...").await?;
|
||||
let git_result = self.ensure_git_repository_with_output(&command_id, git_url, git_branch, working_dir, api_key_file).await;
|
||||
|
||||
// Execute nixos-rebuild if git operation succeeded - run detached but log output
|
||||
let rebuild_result = if git_result.is_ok() {
|
||||
info!("Git repository ready, executing nixos-rebuild in detached mode");
|
||||
let log_file = std::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open("/var/log/cm-dashboard/nixos-rebuild.log")
|
||||
.map_err(|e| anyhow::anyhow!("Failed to open rebuild log: {}", e))?;
|
||||
if git_result.is_err() {
|
||||
self.send_command_output(&command_id, "SystemRebuild", &format!("Git operation failed: {:?}", git_result)).await?;
|
||||
self.send_command_output_complete(&command_id, "SystemRebuild").await?;
|
||||
return git_result;
|
||||
}
|
||||
|
||||
tokio::process::Command::new("nohup")
|
||||
.arg("sudo")
|
||||
.arg("/run/current-system/sw/bin/nixos-rebuild")
|
||||
.arg("switch")
|
||||
.arg("--option")
|
||||
.arg("sandbox")
|
||||
.arg("false")
|
||||
.arg("--flake")
|
||||
.arg(".")
|
||||
.current_dir(working_dir)
|
||||
.stdin(std::process::Stdio::null())
|
||||
.stdout(std::process::Stdio::from(log_file.try_clone().unwrap()))
|
||||
.stderr(std::process::Stdio::from(log_file))
|
||||
.spawn()
|
||||
} else {
|
||||
return git_result.and_then(|_| unreachable!());
|
||||
};
|
||||
self.send_command_output(&command_id, "SystemRebuild", "Git repository ready, starting nixos-rebuild...").await?;
|
||||
|
||||
// Execute nixos-rebuild with real-time output streaming
|
||||
let rebuild_result = self.execute_nixos_rebuild_with_streaming(&command_id, working_dir).await;
|
||||
|
||||
// Always try to remove maintenance mode file
|
||||
if let Err(e) = tokio::fs::remove_file(maintenance_file).await {
|
||||
if e.kind() != std::io::ErrorKind::NotFound {
|
||||
error!("Failed to remove maintenance mode file: {}", e);
|
||||
self.send_command_output(&command_id, "SystemRebuild", &format!("Warning: Failed to remove maintenance mode file: {}", e)).await?;
|
||||
}
|
||||
} else {
|
||||
info!("Maintenance mode disabled");
|
||||
self.send_command_output(&command_id, "SystemRebuild", "Maintenance mode disabled").await?;
|
||||
}
|
||||
|
||||
// Check rebuild start result
|
||||
// Handle rebuild result
|
||||
match rebuild_result {
|
||||
Ok(_child) => {
|
||||
info!("NixOS rebuild started successfully in background");
|
||||
// Don't wait for completion to avoid agent being killed during rebuild
|
||||
Ok(()) => {
|
||||
self.send_command_output(&command_id, "SystemRebuild", "✓ NixOS rebuild completed successfully!").await?;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to start nixos-rebuild: {}", e);
|
||||
return Err(anyhow::anyhow!("Failed to start nixos-rebuild: {}", e));
|
||||
self.send_command_output(&command_id, "SystemRebuild", &format!("✗ NixOS rebuild failed: {}", e)).await?;
|
||||
}
|
||||
}
|
||||
|
||||
info!("System rebuild completed, triggering metric refresh");
|
||||
// Signal completion
|
||||
self.send_command_output_complete(&command_id, "SystemRebuild").await?;
|
||||
|
||||
info!("System rebuild streaming completed");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Send command output line to dashboard
|
||||
async fn send_command_output(&self, command_id: &str, command_type: &str, output_line: &str) -> Result<()> {
|
||||
let message = CommandOutputMessage::new(
|
||||
self.hostname.clone(),
|
||||
command_id.to_string(),
|
||||
command_type.to_string(),
|
||||
output_line.to_string(),
|
||||
false,
|
||||
);
|
||||
self.zmq_handler.publish_command_output(&message).await
|
||||
}
|
||||
|
||||
/// Send command completion signal to dashboard
|
||||
async fn send_command_output_complete(&self, command_id: &str, command_type: &str) -> Result<()> {
|
||||
let message = CommandOutputMessage::new(
|
||||
self.hostname.clone(),
|
||||
command_id.to_string(),
|
||||
command_type.to_string(),
|
||||
"Command completed".to_string(),
|
||||
true,
|
||||
);
|
||||
self.zmq_handler.publish_command_output(&message).await
|
||||
}
|
||||
|
||||
/// Execute nixos-rebuild via systemd service with journal streaming
|
||||
async fn execute_nixos_rebuild_with_streaming(&self, command_id: &str, _working_dir: &str) -> Result<()> {
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
|
||||
self.send_command_output(command_id, "SystemRebuild", "Starting nixos-rebuild via systemd service...").await?;
|
||||
|
||||
// Start the cm-rebuild systemd service
|
||||
let start_result = Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg("start")
|
||||
.arg("cm-rebuild")
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !start_result.status.success() {
|
||||
let error = String::from_utf8_lossy(&start_result.stderr);
|
||||
return Err(anyhow::anyhow!("Failed to start cm-rebuild service: {}", error));
|
||||
}
|
||||
|
||||
self.send_command_output(command_id, "SystemRebuild", "✓ Service started, streaming output...").await?;
|
||||
|
||||
// Stream journal output in real-time
|
||||
let mut journal_child = Command::new("sudo")
|
||||
.arg("journalctl")
|
||||
.arg("-u")
|
||||
.arg("cm-rebuild")
|
||||
.arg("-f")
|
||||
.arg("--no-pager")
|
||||
.arg("--since")
|
||||
.arg("now")
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()?;
|
||||
|
||||
let stdout = journal_child.stdout.take().expect("Failed to get journalctl stdout");
|
||||
let mut reader = BufReader::new(stdout);
|
||||
let mut lines = reader.lines();
|
||||
|
||||
// Stream journal output and monitor service status
|
||||
let mut service_completed = false;
|
||||
let mut status_check_interval = tokio::time::interval(tokio::time::Duration::from_secs(2));
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
// Read journal output
|
||||
line = lines.next_line() => {
|
||||
match line {
|
||||
Ok(Some(line)) => {
|
||||
// Clean up journal format (remove timestamp/service prefix if needed)
|
||||
let clean_line = self.clean_journal_line(&line);
|
||||
self.send_command_output(command_id, "SystemRebuild", &clean_line).await?;
|
||||
}
|
||||
Ok(None) => {
|
||||
// journalctl stream ended
|
||||
break;
|
||||
}
|
||||
Err(_) => {
|
||||
// Error reading journal
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Periodically check service status
|
||||
_ = status_check_interval.tick() => {
|
||||
if let Ok(status_result) = Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg("is-active")
|
||||
.arg("cm-rebuild")
|
||||
.output()
|
||||
.await
|
||||
{
|
||||
let status = String::from_utf8_lossy(&status_result.stdout).trim().to_string();
|
||||
if status == "inactive" {
|
||||
service_completed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Kill journalctl process
|
||||
let _ = journal_child.kill().await;
|
||||
|
||||
// Check final service result
|
||||
let result = Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg("is-failed")
|
||||
.arg("cm-rebuild")
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
let is_failed = String::from_utf8_lossy(&result.stdout).trim();
|
||||
if is_failed == "failed" {
|
||||
return Err(anyhow::anyhow!("cm-rebuild service failed"));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Clean journal line to remove systemd metadata
|
||||
fn clean_journal_line(&self, line: &str) -> String {
|
||||
// Remove timestamp and service name prefix from journal entries
|
||||
// Example: "Oct 26 10:30:15 cmbox cm-rebuild[1234]: actual output"
|
||||
// Becomes: "actual output"
|
||||
|
||||
if let Some(colon_pos) = line.rfind(": ") {
|
||||
line[colon_pos + 2..].to_string()
|
||||
} else {
|
||||
line.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensure git repository with output streaming
|
||||
async fn ensure_git_repository_with_output(&self, command_id: &str, git_url: &str, git_branch: &str, working_dir: &str, api_key_file: Option<&str>) -> Result<()> {
|
||||
// This is a simplified version - we can enhance this later with git output streaming
|
||||
self.ensure_git_repository(git_url, git_branch, working_dir, api_key_file).await
|
||||
}
|
||||
|
||||
/// Ensure git repository is cloned and up to date with force clone approach
|
||||
async fn ensure_git_repository(&self, git_url: &str, git_branch: &str, working_dir: &str, api_key_file: Option<&str>) -> Result<()> {
|
||||
use std::path::Path;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use anyhow::Result;
|
||||
use cm_dashboard_shared::{MessageEnvelope, MetricMessage};
|
||||
use cm_dashboard_shared::{CommandOutputMessage, MessageEnvelope, MetricMessage};
|
||||
use tracing::{debug, info};
|
||||
use zmq::{Context, Socket, SocketType};
|
||||
|
||||
@@ -65,6 +65,24 @@ impl ZmqHandler {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Publish command output message via ZMQ
|
||||
pub async fn publish_command_output(&self, message: &CommandOutputMessage) -> Result<()> {
|
||||
debug!(
|
||||
"Publishing command output for host {} (command: {}): {}",
|
||||
message.hostname,
|
||||
message.command_type,
|
||||
message.output_line
|
||||
);
|
||||
|
||||
let envelope = MessageEnvelope::command_output(message.clone())?;
|
||||
let serialized = serde_json::to_vec(&envelope)?;
|
||||
|
||||
self.publisher.send(&serialized, 0)?;
|
||||
|
||||
debug!("Command output published successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Send heartbeat (placeholder for future use)
|
||||
|
||||
/// Try to receive a command (non-blocking)
|
||||
|
||||
@@ -13,10 +13,26 @@ mod status;
|
||||
|
||||
use agent::Agent;
|
||||
|
||||
/// Get version showing cm-dashboard-agent package hash for easy deployment verification
|
||||
fn get_version() -> &'static str {
|
||||
// Get the path of the current executable
|
||||
let exe_path = std::env::current_exe().expect("Failed to get executable path");
|
||||
let exe_str = exe_path.to_string_lossy();
|
||||
|
||||
// Extract Nix store hash from path like /nix/store/HASH-cm-dashboard-v0.1.8/bin/cm-dashboard-agent
|
||||
let hash_part = exe_str.strip_prefix("/nix/store/").expect("Not a nix store path");
|
||||
let hash = hash_part.split('-').next().expect("Invalid nix store path format");
|
||||
assert!(hash.len() >= 8, "Hash too short");
|
||||
|
||||
// Return first 8 characters of nix store hash
|
||||
let short_hash = hash[..8].to_string();
|
||||
Box::leak(short_hash.into_boxed_str())
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "cm-dashboard-agent")]
|
||||
#[command(about = "CM Dashboard metrics agent with individual metric collection")]
|
||||
#[command(version)]
|
||||
#[command(version = get_version())]
|
||||
struct Cli {
|
||||
/// Increase logging verbosity (-v, -vv)
|
||||
#[arg(short, long, action = clap::ArgAction::Count)]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.0"
|
||||
version = "0.1.13"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -236,6 +236,13 @@ impl Dashboard {
|
||||
self.metric_store
|
||||
.update_metrics(&metric_message.hostname, metric_message.metrics);
|
||||
|
||||
// Check for agent version mismatches across hosts
|
||||
if let Some((current_version, outdated_hosts)) = self.metric_store.get_version_mismatches() {
|
||||
for outdated_host in &outdated_hosts {
|
||||
warn!("Host {} has outdated agent version (current: {})", outdated_host, current_version);
|
||||
}
|
||||
}
|
||||
|
||||
// Update TUI with new hosts and metrics (only if not headless)
|
||||
if let Some(ref mut tui_app) = self.tui_app {
|
||||
let mut connected_hosts = self
|
||||
@@ -261,6 +268,23 @@ impl Dashboard {
|
||||
tui_app.update_metrics(&self.metric_store);
|
||||
}
|
||||
}
|
||||
|
||||
// Also check for command output messages
|
||||
if let Ok(Some(cmd_output)) = self.zmq_consumer.receive_command_output().await {
|
||||
debug!(
|
||||
"Received command output from {}: {}",
|
||||
cmd_output.hostname,
|
||||
cmd_output.output_line
|
||||
);
|
||||
|
||||
// Forward to TUI if not headless
|
||||
if let Some(ref mut tui_app) = self.tui_app {
|
||||
tui_app.add_terminal_output(&cmd_output.hostname, cmd_output.output_line);
|
||||
|
||||
// Note: Popup stays open for manual review - close with ESC/Q
|
||||
}
|
||||
}
|
||||
|
||||
last_metrics_check = Instant::now();
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use anyhow::Result;
|
||||
use cm_dashboard_shared::{MessageEnvelope, MessageType, MetricMessage};
|
||||
use cm_dashboard_shared::{CommandOutputMessage, MessageEnvelope, MessageType, MetricMessage};
|
||||
use tracing::{debug, error, info, warn};
|
||||
use zmq::{Context, Socket, SocketType};
|
||||
|
||||
@@ -103,6 +103,43 @@ impl ZmqConsumer {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Receive command output from any connected agent (non-blocking)
|
||||
pub async fn receive_command_output(&mut self) -> Result<Option<CommandOutputMessage>> {
|
||||
match self.subscriber.recv_bytes(zmq::DONTWAIT) {
|
||||
Ok(data) => {
|
||||
// Deserialize envelope
|
||||
let envelope: MessageEnvelope = serde_json::from_slice(&data)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to deserialize envelope: {}", e))?;
|
||||
|
||||
// Check message type
|
||||
match envelope.message_type {
|
||||
MessageType::CommandOutput => {
|
||||
let cmd_output = envelope
|
||||
.decode_command_output()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to decode command output: {}", e))?;
|
||||
|
||||
debug!(
|
||||
"Received command output from {}: {}",
|
||||
cmd_output.hostname,
|
||||
cmd_output.output_line
|
||||
);
|
||||
|
||||
Ok(Some(cmd_output))
|
||||
}
|
||||
_ => Ok(None), // Not a command output message
|
||||
}
|
||||
}
|
||||
Err(zmq::Error::EAGAIN) => {
|
||||
// No message available (non-blocking mode)
|
||||
Ok(None)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("ZMQ receive error: {}", e);
|
||||
Err(anyhow::anyhow!("ZMQ receive error: {}", e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Receive metrics from any connected agent (non-blocking)
|
||||
pub async fn receive_metrics(&mut self) -> Result<Option<MetricMessage>> {
|
||||
match self.subscriber.recv_bytes(zmq::DONTWAIT) {
|
||||
@@ -132,6 +169,10 @@ impl ZmqConsumer {
|
||||
debug!("Received heartbeat");
|
||||
Ok(None) // Don't return heartbeats as metrics
|
||||
}
|
||||
MessageType::CommandOutput => {
|
||||
debug!("Received command output (will be handled by receive_command_output)");
|
||||
Ok(None) // Command output handled by separate method
|
||||
}
|
||||
_ => {
|
||||
debug!("Received non-metrics message: {:?}", envelope.message_type);
|
||||
Ok(None)
|
||||
|
||||
@@ -124,4 +124,52 @@ impl MetricStore {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get agent versions from all hosts for cross-host comparison
|
||||
pub fn get_agent_versions(&self) -> HashMap<String, String> {
|
||||
let mut versions = HashMap::new();
|
||||
|
||||
for (hostname, metrics) in &self.current_metrics {
|
||||
if let Some(version_metric) = metrics.get("agent_version") {
|
||||
if let cm_dashboard_shared::MetricValue::String(version) = &version_metric.value {
|
||||
versions.insert(hostname.clone(), version.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
versions
|
||||
}
|
||||
|
||||
/// Check for agent version mismatches across hosts
|
||||
pub fn get_version_mismatches(&self) -> Option<(String, Vec<String>)> {
|
||||
let versions = self.get_agent_versions();
|
||||
|
||||
if versions.len() < 2 {
|
||||
return None; // Need at least 2 hosts to compare
|
||||
}
|
||||
|
||||
// Find the most common version (assume it's the "current" version)
|
||||
let mut version_counts = HashMap::new();
|
||||
for version in versions.values() {
|
||||
*version_counts.entry(version.clone()).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
let most_common_version = version_counts
|
||||
.iter()
|
||||
.max_by_key(|(_, count)| *count)
|
||||
.map(|(version, _)| version.clone())?;
|
||||
|
||||
// Find hosts with different versions
|
||||
let outdated_hosts: Vec<String> = versions
|
||||
.iter()
|
||||
.filter(|(_, version)| *version != &most_common_version)
|
||||
.map(|(hostname, _)| hostname.clone())
|
||||
.collect();
|
||||
|
||||
if outdated_hosts.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some((most_common_version, outdated_hosts))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,6 +92,51 @@ impl HostWidgets {
|
||||
}
|
||||
}
|
||||
|
||||
/// Terminal popup for streaming command output
|
||||
#[derive(Clone)]
|
||||
pub struct TerminalPopup {
|
||||
/// Is the popup currently visible
|
||||
pub visible: bool,
|
||||
/// Command being executed
|
||||
pub command_type: CommandType,
|
||||
/// Target hostname
|
||||
pub hostname: String,
|
||||
/// Target service/operation name
|
||||
pub target: String,
|
||||
/// Output lines collected so far
|
||||
pub output_lines: Vec<String>,
|
||||
/// Scroll offset for the output
|
||||
pub scroll_offset: usize,
|
||||
/// Start time of the operation
|
||||
pub start_time: Instant,
|
||||
}
|
||||
|
||||
impl TerminalPopup {
|
||||
pub fn new(command_type: CommandType, hostname: String, target: String) -> Self {
|
||||
Self {
|
||||
visible: true,
|
||||
command_type,
|
||||
hostname,
|
||||
target,
|
||||
output_lines: Vec::new(),
|
||||
scroll_offset: 0,
|
||||
start_time: Instant::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_output_line(&mut self, line: String) {
|
||||
self.output_lines.push(line);
|
||||
// Auto-scroll to bottom when new content arrives
|
||||
if self.output_lines.len() > 20 {
|
||||
self.scroll_offset = self.output_lines.len().saturating_sub(20);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn close(&mut self) {
|
||||
self.visible = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Main TUI application
|
||||
pub struct TuiApp {
|
||||
/// Widget states per host (hostname -> HostWidgets)
|
||||
@@ -108,6 +153,8 @@ pub struct TuiApp {
|
||||
should_quit: bool,
|
||||
/// Track if user manually navigated away from localhost
|
||||
user_navigated_away: bool,
|
||||
/// Terminal popup for streaming command output
|
||||
terminal_popup: Option<TerminalPopup>,
|
||||
}
|
||||
|
||||
impl TuiApp {
|
||||
@@ -120,6 +167,7 @@ impl TuiApp {
|
||||
focused_panel: PanelType::System, // Start with System panel focused
|
||||
should_quit: false,
|
||||
user_navigated_away: false,
|
||||
terminal_popup: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,7 +226,7 @@ impl TuiApp {
|
||||
// Add NixOS metrics - using exact matching for build display fix
|
||||
let nixos_metrics: Vec<&Metric> = all_metrics
|
||||
.iter()
|
||||
.filter(|m| m.name == "system_nixos_build" || m.name == "system_active_users" || m.name == "system_agent_hash")
|
||||
.filter(|m| m.name == "system_nixos_build" || m.name == "system_active_users" || m.name == "agent_version")
|
||||
.copied()
|
||||
.collect();
|
||||
system_metrics.extend(nixos_metrics);
|
||||
@@ -250,6 +298,38 @@ impl TuiApp {
|
||||
/// Handle keyboard input
|
||||
pub fn handle_input(&mut self, event: Event) -> Result<Option<UiCommand>> {
|
||||
if let Event::Key(key) = event {
|
||||
// If terminal popup is visible, handle popup-specific keys first
|
||||
if let Some(ref mut popup) = self.terminal_popup {
|
||||
if popup.visible {
|
||||
match key.code {
|
||||
KeyCode::Esc => {
|
||||
popup.close();
|
||||
self.terminal_popup = None;
|
||||
return Ok(None);
|
||||
}
|
||||
KeyCode::Up => {
|
||||
popup.scroll_offset = popup.scroll_offset.saturating_sub(1);
|
||||
return Ok(None);
|
||||
}
|
||||
KeyCode::Down => {
|
||||
let max_scroll = if popup.output_lines.len() > 20 {
|
||||
popup.output_lines.len() - 20
|
||||
} else {
|
||||
0
|
||||
};
|
||||
popup.scroll_offset = (popup.scroll_offset + 1).min(max_scroll);
|
||||
return Ok(None);
|
||||
}
|
||||
KeyCode::Char('q') => {
|
||||
popup.close();
|
||||
self.terminal_popup = None;
|
||||
return Ok(None);
|
||||
}
|
||||
_ => return Ok(None), // Consume all other keys when popup is open
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match key.code {
|
||||
KeyCode::Char('q') => {
|
||||
self.should_quit = true;
|
||||
@@ -266,6 +346,12 @@ impl TuiApp {
|
||||
// System rebuild command
|
||||
if let Some(hostname) = self.current_host.clone() {
|
||||
self.start_command(&hostname, CommandType::SystemRebuild, hostname.clone());
|
||||
// Open terminal popup for real-time output
|
||||
self.terminal_popup = Some(TerminalPopup::new(
|
||||
CommandType::SystemRebuild,
|
||||
hostname.clone(),
|
||||
"NixOS Rebuild".to_string()
|
||||
));
|
||||
return Ok(Some(UiCommand::SystemRebuild { hostname }));
|
||||
}
|
||||
}
|
||||
@@ -473,6 +559,25 @@ impl TuiApp {
|
||||
}
|
||||
}
|
||||
|
||||
/// Add output line to terminal popup
|
||||
pub fn add_terminal_output(&mut self, hostname: &str, line: String) {
|
||||
if let Some(ref mut popup) = self.terminal_popup {
|
||||
if popup.hostname == hostname && popup.visible {
|
||||
popup.add_output_line(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Close terminal popup for a specific hostname
|
||||
pub fn close_terminal_popup(&mut self, hostname: &str) {
|
||||
if let Some(ref mut popup) = self.terminal_popup {
|
||||
if popup.hostname == hostname {
|
||||
popup.close();
|
||||
self.terminal_popup = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check for rebuild completion by detecting agent hash changes
|
||||
pub fn check_rebuild_completion(&mut self, metric_store: &MetricStore) {
|
||||
let mut hosts_to_complete = Vec::new();
|
||||
@@ -636,6 +741,13 @@ impl TuiApp {
|
||||
|
||||
// Render statusbar at the bottom
|
||||
self.render_statusbar(frame, main_chunks[2]); // main_chunks[2] is the statusbar area
|
||||
|
||||
// Render terminal popup on top of everything else
|
||||
if let Some(ref popup) = self.terminal_popup {
|
||||
if popup.visible {
|
||||
self.render_terminal_popup(frame, size, popup);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Render btop-style minimal title with host status colors
|
||||
@@ -835,4 +947,112 @@ impl TuiApp {
|
||||
}
|
||||
}
|
||||
|
||||
/// Render terminal popup with streaming output
|
||||
fn render_terminal_popup(&self, frame: &mut Frame, area: Rect, popup: &TerminalPopup) {
|
||||
use ratatui::{
|
||||
style::{Color, Modifier, Style},
|
||||
text::{Line, Span},
|
||||
widgets::{Block, Borders, Clear, Paragraph, Wrap},
|
||||
};
|
||||
|
||||
// Calculate popup size (80% of screen, centered)
|
||||
let popup_width = area.width * 80 / 100;
|
||||
let popup_height = area.height * 80 / 100;
|
||||
let popup_x = (area.width - popup_width) / 2;
|
||||
let popup_y = (area.height - popup_height) / 2;
|
||||
|
||||
let popup_area = Rect {
|
||||
x: popup_x,
|
||||
y: popup_y,
|
||||
width: popup_width,
|
||||
height: popup_height,
|
||||
};
|
||||
|
||||
// Clear background
|
||||
frame.render_widget(Clear, popup_area);
|
||||
|
||||
// Create terminal-style block
|
||||
let title = format!(" {} → {} ({:.1}s) ",
|
||||
popup.hostname,
|
||||
popup.target,
|
||||
popup.start_time.elapsed().as_secs_f32()
|
||||
);
|
||||
|
||||
let block = Block::default()
|
||||
.title(title)
|
||||
.borders(Borders::ALL)
|
||||
.border_style(Style::default().fg(Color::Cyan))
|
||||
.style(Style::default().bg(Color::Black));
|
||||
|
||||
let inner_area = block.inner(popup_area);
|
||||
frame.render_widget(block, popup_area);
|
||||
|
||||
// Render output content
|
||||
let available_height = inner_area.height as usize;
|
||||
let total_lines = popup.output_lines.len();
|
||||
|
||||
// Calculate which lines to show based on scroll offset
|
||||
let start_line = popup.scroll_offset;
|
||||
let end_line = (start_line + available_height).min(total_lines);
|
||||
|
||||
let visible_lines: Vec<Line> = popup.output_lines[start_line..end_line]
|
||||
.iter()
|
||||
.map(|line| {
|
||||
// Style output lines with terminal colors
|
||||
if line.contains("error") || line.contains("Error") || line.contains("failed") {
|
||||
Line::from(Span::styled(line.clone(), Style::default().fg(Color::Red)))
|
||||
} else if line.contains("warning") || line.contains("Warning") {
|
||||
Line::from(Span::styled(line.clone(), Style::default().fg(Color::Yellow)))
|
||||
} else if line.contains("building") || line.contains("Building") {
|
||||
Line::from(Span::styled(line.clone(), Style::default().fg(Color::Blue)))
|
||||
} else if line.contains("✓") || line.contains("success") || line.contains("completed") {
|
||||
Line::from(Span::styled(line.clone(), Style::default().fg(Color::Green)))
|
||||
} else {
|
||||
Line::from(Span::styled(line.clone(), Style::default().fg(Color::White)))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let content = Paragraph::new(visible_lines)
|
||||
.wrap(Wrap { trim: false })
|
||||
.style(Style::default().bg(Color::Black));
|
||||
|
||||
frame.render_widget(content, inner_area);
|
||||
|
||||
// Render scroll indicator if needed
|
||||
if total_lines > available_height {
|
||||
let scroll_info = format!(" {}% ",
|
||||
if total_lines > 0 {
|
||||
(end_line * 100) / total_lines
|
||||
} else {
|
||||
100
|
||||
}
|
||||
);
|
||||
|
||||
let scroll_area = Rect {
|
||||
x: popup_area.x + popup_area.width - scroll_info.len() as u16 - 1,
|
||||
y: popup_area.y + popup_area.height - 1,
|
||||
width: scroll_info.len() as u16,
|
||||
height: 1,
|
||||
};
|
||||
|
||||
let scroll_widget = Paragraph::new(scroll_info)
|
||||
.style(Style::default().fg(Color::Cyan).bg(Color::Black));
|
||||
frame.render_widget(scroll_widget, scroll_area);
|
||||
}
|
||||
|
||||
// Instructions at bottom
|
||||
let instructions = " ESC/Q: Close • ↑↓: Scroll ";
|
||||
let instructions_area = Rect {
|
||||
x: popup_area.x + 1,
|
||||
y: popup_area.y + popup_area.height - 1,
|
||||
width: instructions.len() as u16,
|
||||
height: 1,
|
||||
};
|
||||
|
||||
let instructions_widget = Paragraph::new(instructions)
|
||||
.style(Style::default().fg(Color::Gray).bg(Color::Black));
|
||||
frame.render_widget(instructions_widget, instructions_area);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -339,9 +339,9 @@ impl Widget for SystemWidget {
|
||||
self.active_users = Some(users.clone());
|
||||
}
|
||||
}
|
||||
"system_agent_hash" => {
|
||||
if let MetricValue::String(hash) = &metric.value {
|
||||
self.agent_hash = Some(hash.clone());
|
||||
"agent_version" => {
|
||||
if let MetricValue::String(version) = &metric.value {
|
||||
self.agent_hash = Some(version.clone());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -422,19 +422,19 @@ impl SystemWidget {
|
||||
Span::styled("NixOS:", Typography::widget_title())
|
||||
]));
|
||||
|
||||
let config_text = self.config_hash.as_deref().unwrap_or("unknown");
|
||||
let build_text = self.nixos_build.as_deref().unwrap_or("unknown");
|
||||
lines.push(Line::from(vec![
|
||||
Span::styled(format!("Build: {}", config_text), Typography::secondary())
|
||||
Span::styled(format!("Build: {}", build_text), Typography::secondary())
|
||||
]));
|
||||
|
||||
let agent_hash_text = self.agent_hash.as_deref().unwrap_or("unknown");
|
||||
let short_hash = if agent_hash_text.len() > 8 && agent_hash_text != "unknown" {
|
||||
&agent_hash_text[..8]
|
||||
} else {
|
||||
agent_hash_text
|
||||
};
|
||||
let agent_version_text = self.agent_hash.as_deref().unwrap_or("unknown");
|
||||
lines.push(Line::from(vec![
|
||||
Span::styled(format!("Agent: {}", short_hash), Typography::secondary())
|
||||
Span::styled(format!("Agent: {}", agent_version_text), Typography::secondary())
|
||||
]));
|
||||
|
||||
let users_text = self.active_users.as_deref().unwrap_or("unknown");
|
||||
lines.push(Line::from(vec![
|
||||
Span::styled(format!("Active users: {}", users_text), Typography::secondary())
|
||||
]));
|
||||
|
||||
// CPU section
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.0"
|
||||
version = "0.1.13"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -9,6 +9,17 @@ pub struct MetricMessage {
|
||||
pub metrics: Vec<Metric>,
|
||||
}
|
||||
|
||||
/// Command output streaming message
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CommandOutputMessage {
|
||||
pub hostname: String,
|
||||
pub command_id: String,
|
||||
pub command_type: String,
|
||||
pub output_line: String,
|
||||
pub is_complete: bool,
|
||||
pub timestamp: u64,
|
||||
}
|
||||
|
||||
impl MetricMessage {
|
||||
pub fn new(hostname: String, metrics: Vec<Metric>) -> Self {
|
||||
Self {
|
||||
@@ -19,6 +30,19 @@ impl MetricMessage {
|
||||
}
|
||||
}
|
||||
|
||||
impl CommandOutputMessage {
|
||||
pub fn new(hostname: String, command_id: String, command_type: String, output_line: String, is_complete: bool) -> Self {
|
||||
Self {
|
||||
hostname,
|
||||
command_id,
|
||||
command_type,
|
||||
output_line,
|
||||
is_complete,
|
||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Commands that can be sent from dashboard to agent
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum Command {
|
||||
@@ -55,6 +79,7 @@ pub enum MessageType {
|
||||
Metrics,
|
||||
Command,
|
||||
CommandResponse,
|
||||
CommandOutput,
|
||||
Heartbeat,
|
||||
}
|
||||
|
||||
@@ -80,6 +105,13 @@ impl MessageEnvelope {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn command_output(message: CommandOutputMessage) -> Result<Self, crate::SharedError> {
|
||||
Ok(Self {
|
||||
message_type: MessageType::CommandOutput,
|
||||
payload: serde_json::to_vec(&message)?,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn heartbeat() -> Result<Self, crate::SharedError> {
|
||||
Ok(Self {
|
||||
message_type: MessageType::Heartbeat,
|
||||
@@ -113,4 +145,13 @@ impl MessageEnvelope {
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_command_output(&self) -> Result<CommandOutputMessage, crate::SharedError> {
|
||||
match self.message_type {
|
||||
MessageType::CommandOutput => Ok(serde_json::from_slice(&self.payload)?),
|
||||
_ => Err(crate::SharedError::Protocol {
|
||||
message: "Expected command output message".to_string(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user