Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| de252d27b9 | |||
| db0e41a7d3 | |||
| ec460496d8 |
@@ -49,8 +49,12 @@ hostname2 = [
|
||||
### Navigation
|
||||
- **Tab**: Switch between hosts
|
||||
- **↑↓ or j/k**: Select services
|
||||
- **s**: Start selected service (UserStart)
|
||||
- **S**: Stop selected service (UserStop)
|
||||
- **J**: Show service logs (journalctl)
|
||||
- **L**: Show custom log files
|
||||
- **R**: Rebuild current host
|
||||
- **B**: Run backup on current host
|
||||
- **q**: Quit dashboard
|
||||
|
||||
## Core Architecture Principles
|
||||
|
||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.69"
|
||||
version = "0.1.73"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@@ -292,7 +292,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.69"
|
||||
version = "0.1.73"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -315,7 +315,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.69"
|
||||
version = "0.1.73"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
|
||||
@@ -88,7 +88,9 @@ cm-dashboard • ● cmbox ● srv01 ● srv02 ● steambox
|
||||
- **s**: Start selected service (UserStart)
|
||||
- **S**: Stop selected service (UserStop)
|
||||
- **J**: Show service logs (journalctl in tmux popup)
|
||||
- **L**: Show custom log files (tail -f custom paths in tmux popup)
|
||||
- **R**: Rebuild current host
|
||||
- **B**: Run backup on current host
|
||||
- **q**: Quit
|
||||
|
||||
### Status Indicators
|
||||
@@ -173,9 +175,10 @@ subscriber_ports = [6130]
|
||||
[hosts]
|
||||
predefined_hosts = ["cmbox", "srv01", "srv02"]
|
||||
|
||||
[ui]
|
||||
ssh_user = "cm"
|
||||
[ssh]
|
||||
rebuild_user = "cm"
|
||||
rebuild_alias = "nixos-rebuild-cmtec"
|
||||
backup_alias = "cm-backup-run"
|
||||
```
|
||||
|
||||
## Technical Implementation
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.71"
|
||||
version = "0.1.74"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::time::Duration;
|
||||
use tokio::time::interval;
|
||||
use tracing::{debug, error, info};
|
||||
|
||||
use crate::communication::{AgentCommand, ServiceAction, ZmqHandler};
|
||||
use crate::communication::{AgentCommand, ZmqHandler};
|
||||
use crate::config::AgentConfig;
|
||||
use crate::metrics::MetricCollectionManager;
|
||||
use crate::notifications::NotificationManager;
|
||||
@@ -315,79 +315,10 @@ impl Agent {
|
||||
info!("Processing Ping command - agent is alive");
|
||||
// Could send a response back via ZMQ if needed
|
||||
}
|
||||
AgentCommand::ServiceControl { service_name, action } => {
|
||||
info!("Processing ServiceControl command: {} {:?}", service_name, action);
|
||||
if let Err(e) = self.handle_service_control(&service_name, &action).await {
|
||||
error!("Failed to execute service control: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle systemd service control commands
|
||||
async fn handle_service_control(&mut self, service_name: &str, action: &ServiceAction) -> Result<()> {
|
||||
let (action_str, is_user_action) = match action {
|
||||
ServiceAction::Start => ("start", false),
|
||||
ServiceAction::Stop => ("stop", false),
|
||||
ServiceAction::Status => ("status", false),
|
||||
ServiceAction::UserStart => ("start", true),
|
||||
ServiceAction::UserStop => ("stop", true),
|
||||
};
|
||||
|
||||
info!("Executing systemctl {} {} (user action: {})", action_str, service_name, is_user_action);
|
||||
|
||||
// Handle user-stopped service tracking before systemctl execution (stop only)
|
||||
match action {
|
||||
ServiceAction::UserStop => {
|
||||
info!("Marking service '{}' as user-stopped", service_name);
|
||||
if let Err(e) = self.service_tracker.mark_user_stopped(service_name) {
|
||||
error!("Failed to mark service as user-stopped: {}", e);
|
||||
} else {
|
||||
// Sync to global tracker
|
||||
UserStoppedServiceTracker::update_global(&self.service_tracker);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Spawn the systemctl command asynchronously to avoid blocking the agent
|
||||
let service_name_clone = service_name.to_string();
|
||||
let action_str_clone = action_str.to_string();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let result = tokio::process::Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg(&action_str_clone)
|
||||
.arg(format!("{}.service", service_name_clone))
|
||||
.output()
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(output) => {
|
||||
if output.status.success() {
|
||||
info!("Service {} {} completed successfully", service_name_clone, action_str_clone);
|
||||
if !output.stdout.is_empty() {
|
||||
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
|
||||
}
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
error!("Service {} {} failed: {}", service_name_clone, action_str_clone, stderr);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to execute systemctl {} {}: {}", action_str_clone, service_name_clone, e);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
info!("Service {} {} command initiated (non-blocking)", service_name, action_str);
|
||||
|
||||
// Note: Service status will be updated by the normal metric collection cycle
|
||||
// once the systemctl operation completes
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check metrics for user-stopped services that are now active and clear their flags
|
||||
fn clear_user_stopped_flags_for_active_services(&mut self, metrics: &[Metric]) {
|
||||
|
||||
@@ -98,19 +98,4 @@ pub enum AgentCommand {
|
||||
ToggleCollector { name: String, enabled: bool },
|
||||
/// Request status/health check
|
||||
Ping,
|
||||
/// Control systemd service
|
||||
ServiceControl {
|
||||
service_name: String,
|
||||
action: ServiceAction,
|
||||
},
|
||||
}
|
||||
|
||||
/// Service control actions
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
pub enum ServiceAction {
|
||||
Start,
|
||||
Stop,
|
||||
Status,
|
||||
UserStart, // User-initiated start (clears user-stopped flag)
|
||||
UserStop, // User-initiated stop (marks as user-stopped)
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.71"
|
||||
version = "0.1.74"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -9,14 +9,13 @@ use std::io;
|
||||
use std::time::{Duration, Instant};
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::communication::{AgentCommand, ServiceAction, ZmqCommandSender, ZmqConsumer};
|
||||
use crate::communication::{ZmqConsumer};
|
||||
use crate::config::DashboardConfig;
|
||||
use crate::metrics::MetricStore;
|
||||
use crate::ui::{TuiApp, UiCommand};
|
||||
|
||||
pub struct Dashboard {
|
||||
zmq_consumer: ZmqConsumer,
|
||||
zmq_command_sender: ZmqCommandSender,
|
||||
metric_store: MetricStore,
|
||||
tui_app: Option<TuiApp>,
|
||||
terminal: Option<Terminal<CrosstermBackend<io::Stdout>>>,
|
||||
@@ -58,14 +57,6 @@ impl Dashboard {
|
||||
}
|
||||
};
|
||||
|
||||
// Initialize ZMQ command sender
|
||||
let zmq_command_sender = match ZmqCommandSender::new(&config.zmq) {
|
||||
Ok(sender) => sender,
|
||||
Err(e) => {
|
||||
error!("Failed to initialize ZMQ command sender: {}", e);
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
|
||||
// Try to connect to hosts but don't fail if none are available
|
||||
match zmq_consumer.connect_to_predefined_hosts(&config.hosts).await {
|
||||
@@ -124,7 +115,6 @@ impl Dashboard {
|
||||
|
||||
Ok(Self {
|
||||
zmq_consumer,
|
||||
zmq_command_sender,
|
||||
metric_store,
|
||||
tui_app,
|
||||
terminal,
|
||||
@@ -134,12 +124,6 @@ impl Dashboard {
|
||||
})
|
||||
}
|
||||
|
||||
/// Send a command to a specific agent
|
||||
pub async fn send_command(&mut self, hostname: &str, command: AgentCommand) -> Result<()> {
|
||||
self.zmq_command_sender
|
||||
.send_command(hostname, command)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn run(&mut self) -> Result<()> {
|
||||
info!("Starting dashboard main loop");
|
||||
@@ -212,34 +196,18 @@ impl Dashboard {
|
||||
metric_message.metrics.len()
|
||||
);
|
||||
|
||||
// Check if this is the first time we've seen this host
|
||||
// Track first contact with host (no command needed - agent sends data every 2s)
|
||||
let is_new_host = !self
|
||||
.initial_commands_sent
|
||||
.contains(&metric_message.hostname);
|
||||
|
||||
if is_new_host {
|
||||
info!(
|
||||
"First contact with host {}, sending initial CollectNow command",
|
||||
"First contact with host {} - data will update automatically",
|
||||
metric_message.hostname
|
||||
);
|
||||
|
||||
// Send CollectNow command for immediate refresh
|
||||
if let Err(e) = self
|
||||
.send_command(&metric_message.hostname, AgentCommand::CollectNow)
|
||||
.await
|
||||
{
|
||||
error!(
|
||||
"Failed to send initial CollectNow command to {}: {}",
|
||||
metric_message.hostname, e
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"✓ Sent initial CollectNow command to {}",
|
||||
metric_message.hostname
|
||||
);
|
||||
self.initial_commands_sent
|
||||
.insert(metric_message.hostname.clone());
|
||||
}
|
||||
self.initial_commands_sent
|
||||
.insert(metric_message.hostname.clone());
|
||||
}
|
||||
|
||||
// Update metric store
|
||||
@@ -312,22 +280,6 @@ impl Dashboard {
|
||||
/// Execute a UI command by sending it to the appropriate agent
|
||||
async fn execute_ui_command(&self, command: UiCommand) -> Result<()> {
|
||||
match command {
|
||||
UiCommand::ServiceStart { hostname, service_name } => {
|
||||
info!("Sending user start command for service {} on {}", service_name, hostname);
|
||||
let agent_command = AgentCommand::ServiceControl {
|
||||
service_name: service_name.clone(),
|
||||
action: ServiceAction::UserStart,
|
||||
};
|
||||
self.zmq_command_sender.send_command(&hostname, agent_command).await?;
|
||||
}
|
||||
UiCommand::ServiceStop { hostname, service_name } => {
|
||||
info!("Sending user stop command for service {} on {}", service_name, hostname);
|
||||
let agent_command = AgentCommand::ServiceControl {
|
||||
service_name: service_name.clone(),
|
||||
action: ServiceAction::UserStop,
|
||||
};
|
||||
self.zmq_command_sender.send_command(&hostname, agent_command).await?;
|
||||
}
|
||||
UiCommand::TriggerBackup { hostname } => {
|
||||
info!("Trigger backup requested for {}", hostname);
|
||||
// TODO: Implement backup trigger command
|
||||
|
||||
@@ -5,40 +5,6 @@ use zmq::{Context, Socket, SocketType};
|
||||
|
||||
use crate::config::ZmqConfig;
|
||||
|
||||
/// Commands that can be sent to agents
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
pub enum AgentCommand {
|
||||
/// Request immediate metric collection
|
||||
CollectNow,
|
||||
/// Change collection interval
|
||||
SetInterval { seconds: u64 },
|
||||
/// Enable/disable a collector
|
||||
ToggleCollector { name: String, enabled: bool },
|
||||
/// Request status/health check
|
||||
Ping,
|
||||
/// Control systemd service
|
||||
ServiceControl {
|
||||
service_name: String,
|
||||
action: ServiceAction,
|
||||
},
|
||||
/// Rebuild NixOS system
|
||||
SystemRebuild {
|
||||
git_url: String,
|
||||
git_branch: String,
|
||||
working_dir: String,
|
||||
api_key_file: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Service control actions
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
pub enum ServiceAction {
|
||||
Start,
|
||||
Stop,
|
||||
Status,
|
||||
UserStart, // User-initiated start (clears user-stopped flag)
|
||||
UserStop, // User-initiated stop (marks as user-stopped)
|
||||
}
|
||||
|
||||
/// ZMQ consumer for receiving metrics from agents
|
||||
pub struct ZmqConsumer {
|
||||
@@ -71,12 +37,6 @@ impl ZmqConsumer {
|
||||
pub async fn connect_to_host(&mut self, hostname: &str, port: u16) -> Result<()> {
|
||||
let address = format!("tcp://{}:{}", hostname, port);
|
||||
|
||||
// First test basic TCP connectivity to the port
|
||||
if let Err(e) = self.test_tcp_connectivity(hostname, port).await {
|
||||
error!("TCP connectivity test failed for {}: {}", address, e);
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
match self.subscriber.connect(&address) {
|
||||
Ok(()) => {
|
||||
info!("Connected to agent at {}", address);
|
||||
@@ -90,25 +50,6 @@ impl ZmqConsumer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Test TCP connectivity to a host and port with timeout
|
||||
async fn test_tcp_connectivity(&self, hostname: &str, port: u16) -> Result<()> {
|
||||
let timeout = std::time::Duration::from_secs(3);
|
||||
|
||||
match tokio::time::timeout(timeout, tokio::net::TcpStream::connect((hostname, port))).await {
|
||||
Ok(Ok(_stream)) => {
|
||||
debug!("TCP connectivity test passed for {}:{}", hostname, port);
|
||||
Ok(())
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
debug!("TCP connectivity test failed for {}:{}: {}", hostname, port, e);
|
||||
Err(anyhow::anyhow!("TCP connection failed: {}", e))
|
||||
}
|
||||
Err(_) => {
|
||||
debug!("TCP connectivity test timed out for {}:{}", hostname, port);
|
||||
Err(anyhow::anyhow!("TCP connection timed out"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Connect to predefined hosts using their configuration
|
||||
pub async fn connect_to_predefined_hosts(&mut self, hosts: &std::collections::HashMap<String, crate::config::HostDetails>) -> Result<()> {
|
||||
@@ -227,42 +168,3 @@ impl ZmqConsumer {
|
||||
}
|
||||
}
|
||||
|
||||
/// ZMQ command sender for sending commands to agents
|
||||
pub struct ZmqCommandSender {
|
||||
context: Context,
|
||||
}
|
||||
|
||||
impl ZmqCommandSender {
|
||||
pub fn new(_config: &ZmqConfig) -> Result<Self> {
|
||||
let context = Context::new();
|
||||
|
||||
info!("ZMQ command sender initialized");
|
||||
|
||||
Ok(Self { context })
|
||||
}
|
||||
|
||||
/// Send a command to a specific agent
|
||||
pub async fn send_command(&self, hostname: &str, command: AgentCommand) -> Result<()> {
|
||||
// Create a new PUSH socket for this command (ZMQ best practice)
|
||||
let socket = self.context.socket(SocketType::PUSH)?;
|
||||
|
||||
// Set socket options
|
||||
socket.set_linger(1000)?; // Wait up to 1 second on close
|
||||
socket.set_sndtimeo(5000)?; // 5 second send timeout
|
||||
|
||||
// Connect to agent's command port (6131)
|
||||
let address = format!("tcp://{}:6131", hostname);
|
||||
socket.connect(&address)?;
|
||||
|
||||
// Serialize command
|
||||
let serialized = serde_json::to_vec(&command)?;
|
||||
|
||||
// Send command
|
||||
socket.send(&serialized, 0)?;
|
||||
|
||||
info!("Sent command {:?} to agent at {}", command, hostname);
|
||||
|
||||
// Socket will be automatically closed when dropped
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,11 +51,12 @@ pub struct SystemConfig {
|
||||
pub nixos_config_api_key_file: Option<String>,
|
||||
}
|
||||
|
||||
/// SSH configuration for rebuild operations
|
||||
/// SSH configuration for rebuild and backup operations
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SshConfig {
|
||||
pub rebuild_user: String,
|
||||
pub rebuild_alias: String,
|
||||
pub backup_alias: String,
|
||||
}
|
||||
|
||||
/// Service log file configuration per host
|
||||
|
||||
@@ -23,8 +23,6 @@ use widgets::{BackupWidget, ServicesWidget, SystemWidget, Widget};
|
||||
/// Commands that can be triggered from the UI
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum UiCommand {
|
||||
ServiceStart { hostname: String, service_name: String },
|
||||
ServiceStop { hostname: String, service_name: String },
|
||||
TriggerBackup { hostname: String },
|
||||
}
|
||||
|
||||
@@ -272,20 +270,84 @@ impl TuiApp {
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
}
|
||||
}
|
||||
KeyCode::Char('B') => {
|
||||
// Backup command - works on any panel for current host
|
||||
if let Some(hostname) = self.current_host.clone() {
|
||||
let connection_ip = self.get_connection_ip(&hostname);
|
||||
// Create command that shows logo, runs backup, and waits for user input
|
||||
let logo_and_backup = format!(
|
||||
"bash -c 'cat << \"EOF\"\nBackup Operation\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"bash -ic {}\"\necho\necho \"========================================\"\necho \"Backup completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
|
||||
hostname,
|
||||
connection_ip,
|
||||
self.config.ssh.rebuild_user,
|
||||
connection_ip,
|
||||
self.config.ssh.backup_alias
|
||||
);
|
||||
|
||||
std::process::Command::new("tmux")
|
||||
.arg("split-window")
|
||||
.arg("-v")
|
||||
.arg("-p")
|
||||
.arg("30")
|
||||
.arg(&logo_and_backup)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
}
|
||||
}
|
||||
KeyCode::Char('s') => {
|
||||
// Service start command
|
||||
// Service start command via SSH with progress display
|
||||
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||
if self.start_command(&hostname, CommandType::ServiceStart, service_name.clone()) {
|
||||
return Ok(Some(UiCommand::ServiceStart { hostname, service_name }));
|
||||
}
|
||||
// Start transition tracking for visual feedback
|
||||
self.start_command(&hostname, CommandType::ServiceStart, service_name.clone());
|
||||
|
||||
let connection_ip = self.get_connection_ip(&hostname);
|
||||
let service_start_command = format!(
|
||||
"bash -c 'cat << \"EOF\"\nService Start: {}.service\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"sudo systemctl start {}.service && echo \\\"Service started successfully\\\" && sudo systemctl status {}.service --no-pager -l\"\necho\necho \"========================================\"\necho \"Operation completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
|
||||
service_name,
|
||||
hostname,
|
||||
connection_ip,
|
||||
self.config.ssh.rebuild_user,
|
||||
connection_ip,
|
||||
service_name,
|
||||
service_name
|
||||
);
|
||||
|
||||
std::process::Command::new("tmux")
|
||||
.arg("split-window")
|
||||
.arg("-v")
|
||||
.arg("-p")
|
||||
.arg("30")
|
||||
.arg(&service_start_command)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
}
|
||||
}
|
||||
KeyCode::Char('S') => {
|
||||
// Service stop command
|
||||
// Service stop command via SSH with progress display
|
||||
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||
if self.start_command(&hostname, CommandType::ServiceStop, service_name.clone()) {
|
||||
return Ok(Some(UiCommand::ServiceStop { hostname, service_name }));
|
||||
}
|
||||
// Start transition tracking for visual feedback
|
||||
self.start_command(&hostname, CommandType::ServiceStop, service_name.clone());
|
||||
|
||||
let connection_ip = self.get_connection_ip(&hostname);
|
||||
let service_stop_command = format!(
|
||||
"bash -c 'cat << \"EOF\"\nService Stop: {}.service\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"sudo systemctl stop {}.service && echo \\\"Service stopped successfully\\\" && sudo systemctl status {}.service --no-pager -l\"\necho\necho \"========================================\"\necho \"Operation completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
|
||||
service_name,
|
||||
hostname,
|
||||
connection_ip,
|
||||
self.config.ssh.rebuild_user,
|
||||
connection_ip,
|
||||
service_name,
|
||||
service_name
|
||||
);
|
||||
|
||||
std::process::Command::new("tmux")
|
||||
.arg("split-window")
|
||||
.arg("-v")
|
||||
.arg("-p")
|
||||
.arg("30")
|
||||
.arg(&service_stop_command)
|
||||
.spawn()
|
||||
.ok(); // Ignore errors, tmux will handle them
|
||||
}
|
||||
}
|
||||
KeyCode::Char('J') => {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.71"
|
||||
version = "0.1.74"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
Reference in New Issue
Block a user