From de252d27b9764541a6b86424d3de23d92cd386b5 Mon Sep 17 00:00:00 2001 From: Christoffer Martinsson Date: Tue, 18 Nov 2025 16:02:15 +0100 Subject: [PATCH] Migrate service control from ZMQ to SSH with real-time progress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace ZMQ-based service start/stop commands with SSH execution in tmux popups. This provides better user feedback with real-time systemctl output while eliminating blocking operations from the main message processing loop. Changes: - Service start/stop now use SSH with progress display - Added backup functionality with 'B' key - Preserved transitional icons (↑/↓) for immediate visual feedback - Removed all ZMQ service control commands and handlers - Updated configuration to include backup_alias setting - All operations (rebuild, backup, services) now use consistent SSH interface This ensures stable heartbeat processing while providing superior user experience with live command output and service status feedback. --- CLAUDE.md | 4 ++ Cargo.lock | 6 +-- README.md | 7 ++- agent/Cargo.toml | 2 +- agent/src/agent.rs | 71 +------------------------- agent/src/communication/mod.rs | 15 ------ dashboard/Cargo.toml | 2 +- dashboard/src/app.rs | 28 +--------- dashboard/src/communication/mod.rs | 73 -------------------------- dashboard/src/config/mod.rs | 3 +- dashboard/src/ui/mod.rs | 82 ++++++++++++++++++++++++++---- shared/Cargo.toml | 2 +- 12 files changed, 91 insertions(+), 204 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 4c67aed..938b76e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -49,8 +49,12 @@ hostname2 = [ ### Navigation - **Tab**: Switch between hosts - **↑↓ or j/k**: Select services +- **s**: Start selected service (UserStart) +- **S**: Stop selected service (UserStop) - **J**: Show service logs (journalctl) - **L**: Show custom log files +- **R**: Rebuild current host +- **B**: Run backup on current host - **q**: Quit dashboard ## Core Architecture Principles diff --git a/Cargo.lock b/Cargo.lock index e7c913b..6f7e520 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cm-dashboard" -version = "0.1.72" +version = "0.1.73" dependencies = [ "anyhow", "chrono", @@ -292,7 +292,7 @@ dependencies = [ [[package]] name = "cm-dashboard-agent" -version = "0.1.72" +version = "0.1.73" dependencies = [ "anyhow", "async-trait", @@ -315,7 +315,7 @@ dependencies = [ [[package]] name = "cm-dashboard-shared" -version = "0.1.72" +version = "0.1.73" dependencies = [ "chrono", "serde", diff --git a/README.md b/README.md index 22a5863..84e2921 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,9 @@ cm-dashboard • ● cmbox ● srv01 ● srv02 ● steambox - **s**: Start selected service (UserStart) - **S**: Stop selected service (UserStop) - **J**: Show service logs (journalctl in tmux popup) +- **L**: Show custom log files (tail -f custom paths in tmux popup) - **R**: Rebuild current host +- **B**: Run backup on current host - **q**: Quit ### Status Indicators @@ -173,9 +175,10 @@ subscriber_ports = [6130] [hosts] predefined_hosts = ["cmbox", "srv01", "srv02"] -[ui] -ssh_user = "cm" +[ssh] +rebuild_user = "cm" rebuild_alias = "nixos-rebuild-cmtec" +backup_alias = "cm-backup-run" ``` ## Technical Implementation diff --git a/agent/Cargo.toml b/agent/Cargo.toml index 9f02748..7dd43f8 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-agent" -version = "0.1.73" +version = "0.1.74" edition = "2021" [dependencies] diff --git a/agent/src/agent.rs b/agent/src/agent.rs index 0ba4422..366a465 100644 --- a/agent/src/agent.rs +++ b/agent/src/agent.rs @@ -4,7 +4,7 @@ use std::time::Duration; use tokio::time::interval; use tracing::{debug, error, info}; -use crate::communication::{AgentCommand, ServiceAction, ZmqHandler}; +use crate::communication::{AgentCommand, ZmqHandler}; use crate::config::AgentConfig; use crate::metrics::MetricCollectionManager; use crate::notifications::NotificationManager; @@ -315,79 +315,10 @@ impl Agent { info!("Processing Ping command - agent is alive"); // Could send a response back via ZMQ if needed } - AgentCommand::ServiceControl { service_name, action } => { - info!("Processing ServiceControl command: {} {:?}", service_name, action); - if let Err(e) = self.handle_service_control(&service_name, &action).await { - error!("Failed to execute service control: {}", e); - } - } } Ok(()) } - /// Handle systemd service control commands - async fn handle_service_control(&mut self, service_name: &str, action: &ServiceAction) -> Result<()> { - let (action_str, is_user_action) = match action { - ServiceAction::Start => ("start", false), - ServiceAction::Stop => ("stop", false), - ServiceAction::Status => ("status", false), - ServiceAction::UserStart => ("start", true), - ServiceAction::UserStop => ("stop", true), - }; - - info!("Executing systemctl {} {} (user action: {})", action_str, service_name, is_user_action); - - // Handle user-stopped service tracking before systemctl execution (stop only) - match action { - ServiceAction::UserStop => { - info!("Marking service '{}' as user-stopped", service_name); - if let Err(e) = self.service_tracker.mark_user_stopped(service_name) { - error!("Failed to mark service as user-stopped: {}", e); - } else { - // Sync to global tracker - UserStoppedServiceTracker::update_global(&self.service_tracker); - } - } - _ => {} - } - - // Spawn the systemctl command asynchronously to avoid blocking the agent - let service_name_clone = service_name.to_string(); - let action_str_clone = action_str.to_string(); - - tokio::spawn(async move { - let result = tokio::process::Command::new("sudo") - .arg("systemctl") - .arg(&action_str_clone) - .arg(format!("{}.service", service_name_clone)) - .output() - .await; - - match result { - Ok(output) => { - if output.status.success() { - info!("Service {} {} completed successfully", service_name_clone, action_str_clone); - if !output.stdout.is_empty() { - debug!("stdout: {}", String::from_utf8_lossy(&output.stdout)); - } - } else { - let stderr = String::from_utf8_lossy(&output.stderr); - error!("Service {} {} failed: {}", service_name_clone, action_str_clone, stderr); - } - } - Err(e) => { - error!("Failed to execute systemctl {} {}: {}", action_str_clone, service_name_clone, e); - } - } - }); - - info!("Service {} {} command initiated (non-blocking)", service_name, action_str); - - // Note: Service status will be updated by the normal metric collection cycle - // once the systemctl operation completes - - Ok(()) - } /// Check metrics for user-stopped services that are now active and clear their flags fn clear_user_stopped_flags_for_active_services(&mut self, metrics: &[Metric]) { diff --git a/agent/src/communication/mod.rs b/agent/src/communication/mod.rs index 6c2b4bd..e94dcab 100644 --- a/agent/src/communication/mod.rs +++ b/agent/src/communication/mod.rs @@ -98,19 +98,4 @@ pub enum AgentCommand { ToggleCollector { name: String, enabled: bool }, /// Request status/health check Ping, - /// Control systemd service - ServiceControl { - service_name: String, - action: ServiceAction, - }, -} - -/// Service control actions -#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] -pub enum ServiceAction { - Start, - Stop, - Status, - UserStart, // User-initiated start (clears user-stopped flag) - UserStop, // User-initiated stop (marks as user-stopped) } diff --git a/dashboard/Cargo.toml b/dashboard/Cargo.toml index f698968..a44fc36 100644 --- a/dashboard/Cargo.toml +++ b/dashboard/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard" -version = "0.1.73" +version = "0.1.74" edition = "2021" [dependencies] diff --git a/dashboard/src/app.rs b/dashboard/src/app.rs index f803be0..d3ffd38 100644 --- a/dashboard/src/app.rs +++ b/dashboard/src/app.rs @@ -9,14 +9,13 @@ use std::io; use std::time::{Duration, Instant}; use tracing::{debug, error, info, warn}; -use crate::communication::{AgentCommand, ServiceAction, ZmqCommandSender, ZmqConsumer}; +use crate::communication::{ZmqConsumer}; use crate::config::DashboardConfig; use crate::metrics::MetricStore; use crate::ui::{TuiApp, UiCommand}; pub struct Dashboard { zmq_consumer: ZmqConsumer, - zmq_command_sender: ZmqCommandSender, metric_store: MetricStore, tui_app: Option, terminal: Option>>, @@ -58,14 +57,6 @@ impl Dashboard { } }; - // Initialize ZMQ command sender - let zmq_command_sender = match ZmqCommandSender::new(&config.zmq) { - Ok(sender) => sender, - Err(e) => { - error!("Failed to initialize ZMQ command sender: {}", e); - return Err(e); - } - }; // Try to connect to hosts but don't fail if none are available match zmq_consumer.connect_to_predefined_hosts(&config.hosts).await { @@ -124,7 +115,6 @@ impl Dashboard { Ok(Self { zmq_consumer, - zmq_command_sender, metric_store, tui_app, terminal, @@ -290,22 +280,6 @@ impl Dashboard { /// Execute a UI command by sending it to the appropriate agent async fn execute_ui_command(&self, command: UiCommand) -> Result<()> { match command { - UiCommand::ServiceStart { hostname, service_name } => { - info!("Sending user start command for service {} on {}", service_name, hostname); - let agent_command = AgentCommand::ServiceControl { - service_name: service_name.clone(), - action: ServiceAction::UserStart, - }; - self.zmq_command_sender.send_command(&hostname, agent_command).await?; - } - UiCommand::ServiceStop { hostname, service_name } => { - info!("Sending user stop command for service {} on {}", service_name, hostname); - let agent_command = AgentCommand::ServiceControl { - service_name: service_name.clone(), - action: ServiceAction::UserStop, - }; - self.zmq_command_sender.send_command(&hostname, agent_command).await?; - } UiCommand::TriggerBackup { hostname } => { info!("Trigger backup requested for {}", hostname); // TODO: Implement backup trigger command diff --git a/dashboard/src/communication/mod.rs b/dashboard/src/communication/mod.rs index 18ff944..2646a9b 100644 --- a/dashboard/src/communication/mod.rs +++ b/dashboard/src/communication/mod.rs @@ -5,40 +5,6 @@ use zmq::{Context, Socket, SocketType}; use crate::config::ZmqConfig; -/// Commands that can be sent to agents -#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] -pub enum AgentCommand { - /// Request immediate metric collection - CollectNow, - /// Change collection interval - SetInterval { seconds: u64 }, - /// Enable/disable a collector - ToggleCollector { name: String, enabled: bool }, - /// Request status/health check - Ping, - /// Control systemd service - ServiceControl { - service_name: String, - action: ServiceAction, - }, - /// Rebuild NixOS system - SystemRebuild { - git_url: String, - git_branch: String, - working_dir: String, - api_key_file: Option, - }, -} - -/// Service control actions -#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] -pub enum ServiceAction { - Start, - Stop, - Status, - UserStart, // User-initiated start (clears user-stopped flag) - UserStop, // User-initiated stop (marks as user-stopped) -} /// ZMQ consumer for receiving metrics from agents pub struct ZmqConsumer { @@ -202,42 +168,3 @@ impl ZmqConsumer { } } -/// ZMQ command sender for sending commands to agents -pub struct ZmqCommandSender { - context: Context, -} - -impl ZmqCommandSender { - pub fn new(_config: &ZmqConfig) -> Result { - let context = Context::new(); - - info!("ZMQ command sender initialized"); - - Ok(Self { context }) - } - - /// Send a command to a specific agent - pub async fn send_command(&self, hostname: &str, command: AgentCommand) -> Result<()> { - // Create a new PUSH socket for this command (ZMQ best practice) - let socket = self.context.socket(SocketType::PUSH)?; - - // Set socket options - socket.set_linger(1000)?; // Wait up to 1 second on close - socket.set_sndtimeo(5000)?; // 5 second send timeout - - // Connect to agent's command port (6131) - let address = format!("tcp://{}:6131", hostname); - socket.connect(&address)?; - - // Serialize command - let serialized = serde_json::to_vec(&command)?; - - // Send command - socket.send(&serialized, 0)?; - - info!("Sent command {:?} to agent at {}", command, hostname); - - // Socket will be automatically closed when dropped - Ok(()) - } -} diff --git a/dashboard/src/config/mod.rs b/dashboard/src/config/mod.rs index cfd6a8d..10d9bc7 100644 --- a/dashboard/src/config/mod.rs +++ b/dashboard/src/config/mod.rs @@ -51,11 +51,12 @@ pub struct SystemConfig { pub nixos_config_api_key_file: Option, } -/// SSH configuration for rebuild operations +/// SSH configuration for rebuild and backup operations #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SshConfig { pub rebuild_user: String, pub rebuild_alias: String, + pub backup_alias: String, } /// Service log file configuration per host diff --git a/dashboard/src/ui/mod.rs b/dashboard/src/ui/mod.rs index fbee6f3..523f745 100644 --- a/dashboard/src/ui/mod.rs +++ b/dashboard/src/ui/mod.rs @@ -23,8 +23,6 @@ use widgets::{BackupWidget, ServicesWidget, SystemWidget, Widget}; /// Commands that can be triggered from the UI #[derive(Debug, Clone)] pub enum UiCommand { - ServiceStart { hostname: String, service_name: String }, - ServiceStop { hostname: String, service_name: String }, TriggerBackup { hostname: String }, } @@ -272,20 +270,84 @@ impl TuiApp { .ok(); // Ignore errors, tmux will handle them } } + KeyCode::Char('B') => { + // Backup command - works on any panel for current host + if let Some(hostname) = self.current_host.clone() { + let connection_ip = self.get_connection_ip(&hostname); + // Create command that shows logo, runs backup, and waits for user input + let logo_and_backup = format!( + "bash -c 'cat << \"EOF\"\nBackup Operation\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"bash -ic {}\"\necho\necho \"========================================\"\necho \"Backup completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'", + hostname, + connection_ip, + self.config.ssh.rebuild_user, + connection_ip, + self.config.ssh.backup_alias + ); + + std::process::Command::new("tmux") + .arg("split-window") + .arg("-v") + .arg("-p") + .arg("30") + .arg(&logo_and_backup) + .spawn() + .ok(); // Ignore errors, tmux will handle them + } + } KeyCode::Char('s') => { - // Service start command + // Service start command via SSH with progress display if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) { - if self.start_command(&hostname, CommandType::ServiceStart, service_name.clone()) { - return Ok(Some(UiCommand::ServiceStart { hostname, service_name })); - } + // Start transition tracking for visual feedback + self.start_command(&hostname, CommandType::ServiceStart, service_name.clone()); + + let connection_ip = self.get_connection_ip(&hostname); + let service_start_command = format!( + "bash -c 'cat << \"EOF\"\nService Start: {}.service\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"sudo systemctl start {}.service && echo \\\"Service started successfully\\\" && sudo systemctl status {}.service --no-pager -l\"\necho\necho \"========================================\"\necho \"Operation completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'", + service_name, + hostname, + connection_ip, + self.config.ssh.rebuild_user, + connection_ip, + service_name, + service_name + ); + + std::process::Command::new("tmux") + .arg("split-window") + .arg("-v") + .arg("-p") + .arg("30") + .arg(&service_start_command) + .spawn() + .ok(); // Ignore errors, tmux will handle them } } KeyCode::Char('S') => { - // Service stop command + // Service stop command via SSH with progress display if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) { - if self.start_command(&hostname, CommandType::ServiceStop, service_name.clone()) { - return Ok(Some(UiCommand::ServiceStop { hostname, service_name })); - } + // Start transition tracking for visual feedback + self.start_command(&hostname, CommandType::ServiceStop, service_name.clone()); + + let connection_ip = self.get_connection_ip(&hostname); + let service_stop_command = format!( + "bash -c 'cat << \"EOF\"\nService Stop: {}.service\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"sudo systemctl stop {}.service && echo \\\"Service stopped successfully\\\" && sudo systemctl status {}.service --no-pager -l\"\necho\necho \"========================================\"\necho \"Operation completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'", + service_name, + hostname, + connection_ip, + self.config.ssh.rebuild_user, + connection_ip, + service_name, + service_name + ); + + std::process::Command::new("tmux") + .arg("split-window") + .arg("-v") + .arg("-p") + .arg("30") + .arg(&service_stop_command) + .spawn() + .ok(); // Ignore errors, tmux will handle them } } KeyCode::Char('J') => { diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 1e95f72..e65d13e 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-shared" -version = "0.1.73" +version = "0.1.74" edition = "2021" [dependencies]