Migrate service control from ZMQ to SSH with real-time progress
All checks were successful
Build and Release / build-and-release (push) Successful in 2m34s

Replace ZMQ-based service start/stop commands with SSH execution in tmux
popups. This provides better user feedback with real-time systemctl output
while eliminating blocking operations from the main message processing loop.

Changes:
- Service start/stop now use SSH with progress display
- Added backup functionality with 'B' key
- Preserved transitional icons (↑/↓) for immediate visual feedback
- Removed all ZMQ service control commands and handlers
- Updated configuration to include backup_alias setting
- All operations (rebuild, backup, services) now use consistent SSH interface

This ensures stable heartbeat processing while providing superior user
experience with live command output and service status feedback.
This commit is contained in:
Christoffer Martinsson 2025-11-18 16:02:15 +01:00
parent db0e41a7d3
commit de252d27b9
12 changed files with 91 additions and 204 deletions

View File

@ -49,8 +49,12 @@ hostname2 = [
### Navigation
- **Tab**: Switch between hosts
- **↑↓ or j/k**: Select services
- **s**: Start selected service (UserStart)
- **S**: Stop selected service (UserStop)
- **J**: Show service logs (journalctl)
- **L**: Show custom log files
- **R**: Rebuild current host
- **B**: Run backup on current host
- **q**: Quit dashboard
## Core Architecture Principles

6
Cargo.lock generated
View File

@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
[[package]]
name = "cm-dashboard"
version = "0.1.72"
version = "0.1.73"
dependencies = [
"anyhow",
"chrono",
@ -292,7 +292,7 @@ dependencies = [
[[package]]
name = "cm-dashboard-agent"
version = "0.1.72"
version = "0.1.73"
dependencies = [
"anyhow",
"async-trait",
@ -315,7 +315,7 @@ dependencies = [
[[package]]
name = "cm-dashboard-shared"
version = "0.1.72"
version = "0.1.73"
dependencies = [
"chrono",
"serde",

View File

@ -88,7 +88,9 @@ cm-dashboard • ● cmbox ● srv01 ● srv02 ● steambox
- **s**: Start selected service (UserStart)
- **S**: Stop selected service (UserStop)
- **J**: Show service logs (journalctl in tmux popup)
- **L**: Show custom log files (tail -f custom paths in tmux popup)
- **R**: Rebuild current host
- **B**: Run backup on current host
- **q**: Quit
### Status Indicators
@ -173,9 +175,10 @@ subscriber_ports = [6130]
[hosts]
predefined_hosts = ["cmbox", "srv01", "srv02"]
[ui]
ssh_user = "cm"
[ssh]
rebuild_user = "cm"
rebuild_alias = "nixos-rebuild-cmtec"
backup_alias = "cm-backup-run"
```
## Technical Implementation

View File

@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-agent"
version = "0.1.73"
version = "0.1.74"
edition = "2021"
[dependencies]

View File

@ -4,7 +4,7 @@ use std::time::Duration;
use tokio::time::interval;
use tracing::{debug, error, info};
use crate::communication::{AgentCommand, ServiceAction, ZmqHandler};
use crate::communication::{AgentCommand, ZmqHandler};
use crate::config::AgentConfig;
use crate::metrics::MetricCollectionManager;
use crate::notifications::NotificationManager;
@ -315,79 +315,10 @@ impl Agent {
info!("Processing Ping command - agent is alive");
// Could send a response back via ZMQ if needed
}
AgentCommand::ServiceControl { service_name, action } => {
info!("Processing ServiceControl command: {} {:?}", service_name, action);
if let Err(e) = self.handle_service_control(&service_name, &action).await {
error!("Failed to execute service control: {}", e);
}
}
}
Ok(())
}
/// Handle systemd service control commands
async fn handle_service_control(&mut self, service_name: &str, action: &ServiceAction) -> Result<()> {
let (action_str, is_user_action) = match action {
ServiceAction::Start => ("start", false),
ServiceAction::Stop => ("stop", false),
ServiceAction::Status => ("status", false),
ServiceAction::UserStart => ("start", true),
ServiceAction::UserStop => ("stop", true),
};
info!("Executing systemctl {} {} (user action: {})", action_str, service_name, is_user_action);
// Handle user-stopped service tracking before systemctl execution (stop only)
match action {
ServiceAction::UserStop => {
info!("Marking service '{}' as user-stopped", service_name);
if let Err(e) = self.service_tracker.mark_user_stopped(service_name) {
error!("Failed to mark service as user-stopped: {}", e);
} else {
// Sync to global tracker
UserStoppedServiceTracker::update_global(&self.service_tracker);
}
}
_ => {}
}
// Spawn the systemctl command asynchronously to avoid blocking the agent
let service_name_clone = service_name.to_string();
let action_str_clone = action_str.to_string();
tokio::spawn(async move {
let result = tokio::process::Command::new("sudo")
.arg("systemctl")
.arg(&action_str_clone)
.arg(format!("{}.service", service_name_clone))
.output()
.await;
match result {
Ok(output) => {
if output.status.success() {
info!("Service {} {} completed successfully", service_name_clone, action_str_clone);
if !output.stdout.is_empty() {
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
}
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
error!("Service {} {} failed: {}", service_name_clone, action_str_clone, stderr);
}
}
Err(e) => {
error!("Failed to execute systemctl {} {}: {}", action_str_clone, service_name_clone, e);
}
}
});
info!("Service {} {} command initiated (non-blocking)", service_name, action_str);
// Note: Service status will be updated by the normal metric collection cycle
// once the systemctl operation completes
Ok(())
}
/// Check metrics for user-stopped services that are now active and clear their flags
fn clear_user_stopped_flags_for_active_services(&mut self, metrics: &[Metric]) {

View File

@ -98,19 +98,4 @@ pub enum AgentCommand {
ToggleCollector { name: String, enabled: bool },
/// Request status/health check
Ping,
/// Control systemd service
ServiceControl {
service_name: String,
action: ServiceAction,
},
}
/// Service control actions
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub enum ServiceAction {
Start,
Stop,
Status,
UserStart, // User-initiated start (clears user-stopped flag)
UserStop, // User-initiated stop (marks as user-stopped)
}

View File

@ -1,6 +1,6 @@
[package]
name = "cm-dashboard"
version = "0.1.73"
version = "0.1.74"
edition = "2021"
[dependencies]

View File

@ -9,14 +9,13 @@ use std::io;
use std::time::{Duration, Instant};
use tracing::{debug, error, info, warn};
use crate::communication::{AgentCommand, ServiceAction, ZmqCommandSender, ZmqConsumer};
use crate::communication::{ZmqConsumer};
use crate::config::DashboardConfig;
use crate::metrics::MetricStore;
use crate::ui::{TuiApp, UiCommand};
pub struct Dashboard {
zmq_consumer: ZmqConsumer,
zmq_command_sender: ZmqCommandSender,
metric_store: MetricStore,
tui_app: Option<TuiApp>,
terminal: Option<Terminal<CrosstermBackend<io::Stdout>>>,
@ -58,14 +57,6 @@ impl Dashboard {
}
};
// Initialize ZMQ command sender
let zmq_command_sender = match ZmqCommandSender::new(&config.zmq) {
Ok(sender) => sender,
Err(e) => {
error!("Failed to initialize ZMQ command sender: {}", e);
return Err(e);
}
};
// Try to connect to hosts but don't fail if none are available
match zmq_consumer.connect_to_predefined_hosts(&config.hosts).await {
@ -124,7 +115,6 @@ impl Dashboard {
Ok(Self {
zmq_consumer,
zmq_command_sender,
metric_store,
tui_app,
terminal,
@ -290,22 +280,6 @@ impl Dashboard {
/// Execute a UI command by sending it to the appropriate agent
async fn execute_ui_command(&self, command: UiCommand) -> Result<()> {
match command {
UiCommand::ServiceStart { hostname, service_name } => {
info!("Sending user start command for service {} on {}", service_name, hostname);
let agent_command = AgentCommand::ServiceControl {
service_name: service_name.clone(),
action: ServiceAction::UserStart,
};
self.zmq_command_sender.send_command(&hostname, agent_command).await?;
}
UiCommand::ServiceStop { hostname, service_name } => {
info!("Sending user stop command for service {} on {}", service_name, hostname);
let agent_command = AgentCommand::ServiceControl {
service_name: service_name.clone(),
action: ServiceAction::UserStop,
};
self.zmq_command_sender.send_command(&hostname, agent_command).await?;
}
UiCommand::TriggerBackup { hostname } => {
info!("Trigger backup requested for {}", hostname);
// TODO: Implement backup trigger command

View File

@ -5,40 +5,6 @@ use zmq::{Context, Socket, SocketType};
use crate::config::ZmqConfig;
/// Commands that can be sent to agents
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub enum AgentCommand {
/// Request immediate metric collection
CollectNow,
/// Change collection interval
SetInterval { seconds: u64 },
/// Enable/disable a collector
ToggleCollector { name: String, enabled: bool },
/// Request status/health check
Ping,
/// Control systemd service
ServiceControl {
service_name: String,
action: ServiceAction,
},
/// Rebuild NixOS system
SystemRebuild {
git_url: String,
git_branch: String,
working_dir: String,
api_key_file: Option<String>,
},
}
/// Service control actions
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub enum ServiceAction {
Start,
Stop,
Status,
UserStart, // User-initiated start (clears user-stopped flag)
UserStop, // User-initiated stop (marks as user-stopped)
}
/// ZMQ consumer for receiving metrics from agents
pub struct ZmqConsumer {
@ -202,42 +168,3 @@ impl ZmqConsumer {
}
}
/// ZMQ command sender for sending commands to agents
pub struct ZmqCommandSender {
context: Context,
}
impl ZmqCommandSender {
pub fn new(_config: &ZmqConfig) -> Result<Self> {
let context = Context::new();
info!("ZMQ command sender initialized");
Ok(Self { context })
}
/// Send a command to a specific agent
pub async fn send_command(&self, hostname: &str, command: AgentCommand) -> Result<()> {
// Create a new PUSH socket for this command (ZMQ best practice)
let socket = self.context.socket(SocketType::PUSH)?;
// Set socket options
socket.set_linger(1000)?; // Wait up to 1 second on close
socket.set_sndtimeo(5000)?; // 5 second send timeout
// Connect to agent's command port (6131)
let address = format!("tcp://{}:6131", hostname);
socket.connect(&address)?;
// Serialize command
let serialized = serde_json::to_vec(&command)?;
// Send command
socket.send(&serialized, 0)?;
info!("Sent command {:?} to agent at {}", command, hostname);
// Socket will be automatically closed when dropped
Ok(())
}
}

View File

@ -51,11 +51,12 @@ pub struct SystemConfig {
pub nixos_config_api_key_file: Option<String>,
}
/// SSH configuration for rebuild operations
/// SSH configuration for rebuild and backup operations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SshConfig {
pub rebuild_user: String,
pub rebuild_alias: String,
pub backup_alias: String,
}
/// Service log file configuration per host

View File

@ -23,8 +23,6 @@ use widgets::{BackupWidget, ServicesWidget, SystemWidget, Widget};
/// Commands that can be triggered from the UI
#[derive(Debug, Clone)]
pub enum UiCommand {
ServiceStart { hostname: String, service_name: String },
ServiceStop { hostname: String, service_name: String },
TriggerBackup { hostname: String },
}
@ -272,20 +270,84 @@ impl TuiApp {
.ok(); // Ignore errors, tmux will handle them
}
}
KeyCode::Char('B') => {
// Backup command - works on any panel for current host
if let Some(hostname) = self.current_host.clone() {
let connection_ip = self.get_connection_ip(&hostname);
// Create command that shows logo, runs backup, and waits for user input
let logo_and_backup = format!(
"bash -c 'cat << \"EOF\"\nBackup Operation\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"bash -ic {}\"\necho\necho \"========================================\"\necho \"Backup completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
hostname,
connection_ip,
self.config.ssh.rebuild_user,
connection_ip,
self.config.ssh.backup_alias
);
std::process::Command::new("tmux")
.arg("split-window")
.arg("-v")
.arg("-p")
.arg("30")
.arg(&logo_and_backup)
.spawn()
.ok(); // Ignore errors, tmux will handle them
}
}
KeyCode::Char('s') => {
// Service start command
// Service start command via SSH with progress display
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
if self.start_command(&hostname, CommandType::ServiceStart, service_name.clone()) {
return Ok(Some(UiCommand::ServiceStart { hostname, service_name }));
}
// Start transition tracking for visual feedback
self.start_command(&hostname, CommandType::ServiceStart, service_name.clone());
let connection_ip = self.get_connection_ip(&hostname);
let service_start_command = format!(
"bash -c 'cat << \"EOF\"\nService Start: {}.service\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"sudo systemctl start {}.service && echo \\\"Service started successfully\\\" && sudo systemctl status {}.service --no-pager -l\"\necho\necho \"========================================\"\necho \"Operation completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
service_name,
hostname,
connection_ip,
self.config.ssh.rebuild_user,
connection_ip,
service_name,
service_name
);
std::process::Command::new("tmux")
.arg("split-window")
.arg("-v")
.arg("-p")
.arg("30")
.arg(&service_start_command)
.spawn()
.ok(); // Ignore errors, tmux will handle them
}
}
KeyCode::Char('S') => {
// Service stop command
// Service stop command via SSH with progress display
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
if self.start_command(&hostname, CommandType::ServiceStop, service_name.clone()) {
return Ok(Some(UiCommand::ServiceStop { hostname, service_name }));
}
// Start transition tracking for visual feedback
self.start_command(&hostname, CommandType::ServiceStop, service_name.clone());
let connection_ip = self.get_connection_ip(&hostname);
let service_stop_command = format!(
"bash -c 'cat << \"EOF\"\nService Stop: {}.service\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"sudo systemctl stop {}.service && echo \\\"Service stopped successfully\\\" && sudo systemctl status {}.service --no-pager -l\"\necho\necho \"========================================\"\necho \"Operation completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
service_name,
hostname,
connection_ip,
self.config.ssh.rebuild_user,
connection_ip,
service_name,
service_name
);
std::process::Command::new("tmux")
.arg("split-window")
.arg("-v")
.arg("-p")
.arg("30")
.arg(&service_stop_command)
.spawn()
.ok(); // Ignore errors, tmux will handle them
}
}
KeyCode::Char('J') => {

View File

@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-shared"
version = "0.1.73"
version = "0.1.74"
edition = "2021"
[dependencies]