From 69892a2d843be90edba58bc4f2a3bf019b777938 Mon Sep 17 00:00:00 2001 From: Christoffer Martinsson Date: Sun, 26 Oct 2025 23:18:09 +0100 Subject: [PATCH] Implement systemd service approach for nixos-rebuild operations - Add cm-rebuild systemd service for process isolation - Add sudo permissions for service control and journal access - Remove verbose flag for cleaner output - Ensures reliable rebuild operations without agent crashes --- CLAUDE.md | 50 ++++++++++++++++ Cargo.lock | 6 +- agent/Cargo.toml | 2 +- agent/src/agent.rs | 135 ++++++++++++++++++++++++++++--------------- dashboard/Cargo.toml | 2 +- shared/Cargo.toml | 2 +- 6 files changed, 144 insertions(+), 53 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 9b766f8..96b4e9e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -92,6 +92,56 @@ Storage: - ✅ **Git Clone Approach**: Implemented for nixos-rebuild to avoid directory permissions - ✅ **Visual Feedback**: Directional arrows for service status (↑ starting, ↓ stopping, ↻ restarting) +### Terminal Popup for Real-time Output - IMPLEMENTED ✅ + +**Status (as of 2025-10-26):** +- ✅ **Terminal Popup UI**: 80% screen coverage with terminal styling and color-coded output +- ✅ **ZMQ Streaming Protocol**: CommandOutputMessage for real-time output transmission +- ✅ **Keyboard Controls**: ESC/Q to close, ↑↓ to scroll, manual close (no auto-close) +- ✅ **Real-time Display**: Live streaming of command output as it happens +- ✅ **Version-based Agent Reporting**: Shows "Agent: v0.1.13" instead of nix store hash + +**Current Implementation Issues:** +- ❌ **Agent Process Crashes**: Agent dies during nixos-rebuild execution +- ❌ **Inconsistent Output**: Different outputs each time 'R' is pressed +- ❌ **Limited Output Visibility**: Not capturing all nixos-rebuild progress + +**PLANNED SOLUTION - Systemd Service Approach:** + +**Problem**: Direct nixos-rebuild execution in agent causes process crashes and inconsistent output. + +**Solution**: Create dedicated systemd service for rebuild operations. + +**Implementation Plan:** +1. **NixOS Systemd Service**: + ```nix + systemd.services.cm-rebuild = { + description = "CM Dashboard NixOS Rebuild"; + serviceConfig = { + Type = "oneshot"; + ExecStart = "${pkgs.nixos-rebuild}/bin/nixos-rebuild switch --flake . --option sandbox false"; + WorkingDirectory = "/var/lib/cm-dashboard/nixos-config"; + User = "root"; + StandardOutput = "journal"; + StandardError = "journal"; + }; + }; + ``` + +2. **Agent Modification**: + - Replace direct nixos-rebuild execution with: `systemctl start cm-rebuild` + - Stream output via: `journalctl -u cm-rebuild -f --no-pager` + - Monitor service status for completion detection + +3. **Benefits**: + - **Process Isolation**: Service runs independently, won't crash agent + - **Consistent Output**: Always same deterministic rebuild process + - **Proper Logging**: systemd journal handles all output management + - **Resource Management**: systemd manages cleanup and resource limits + - **Status Tracking**: Can query service status (running/failed/success) + +**Next Priority**: Implement systemd service approach for reliable rebuild operations. + **Keyboard Controls Status:** - **Services Panel**: - R (restart) ✅ Working diff --git a/Cargo.lock b/Cargo.lock index 14e8f25..ce81552 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cm-dashboard" -version = "0.1.0" +version = "0.1.13" dependencies = [ "anyhow", "chrono", @@ -291,7 +291,7 @@ dependencies = [ [[package]] name = "cm-dashboard-agent" -version = "0.1.0" +version = "0.1.13" dependencies = [ "anyhow", "async-trait", @@ -314,7 +314,7 @@ dependencies = [ [[package]] name = "cm-dashboard-shared" -version = "0.1.0" +version = "0.1.13" dependencies = [ "chrono", "serde", diff --git a/agent/Cargo.toml b/agent/Cargo.toml index 342ef9c..b68112a 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-agent" -version = "0.1.11" +version = "0.1.13" edition = "2021" [dependencies] diff --git a/agent/src/agent.rs b/agent/src/agent.rs index afe521a..77de006 100644 --- a/agent/src/agent.rs +++ b/agent/src/agent.rs @@ -385,77 +385,118 @@ impl Agent { self.zmq_handler.publish_command_output(&message).await } - /// Execute nixos-rebuild with real-time output streaming - async fn execute_nixos_rebuild_with_streaming(&self, command_id: &str, working_dir: &str) -> Result<()> { + /// Execute nixos-rebuild via systemd service with journal streaming + async fn execute_nixos_rebuild_with_streaming(&self, command_id: &str, _working_dir: &str) -> Result<()> { use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Command; - let mut child = Command::new("sudo") - .arg("/run/current-system/sw/bin/nixos-rebuild") - .arg("switch") - .arg("--option") - .arg("sandbox") - .arg("false") - .arg("--flake") - .arg(".") - .current_dir(working_dir) + self.send_command_output(command_id, "SystemRebuild", "Starting nixos-rebuild via systemd service...").await?; + + // Start the cm-rebuild systemd service + let start_result = Command::new("sudo") + .arg("systemctl") + .arg("start") + .arg("cm-rebuild") + .output() + .await?; + + if !start_result.status.success() { + let error = String::from_utf8_lossy(&start_result.stderr); + return Err(anyhow::anyhow!("Failed to start cm-rebuild service: {}", error)); + } + + self.send_command_output(command_id, "SystemRebuild", "✓ Service started, streaming output...").await?; + + // Stream journal output in real-time + let mut journal_child = Command::new("sudo") + .arg("journalctl") + .arg("-u") + .arg("cm-rebuild") + .arg("-f") + .arg("--no-pager") + .arg("--since") + .arg("now") .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()) .spawn()?; - // Get stdout and stderr handles - let stdout = child.stdout.take().expect("Failed to get stdout"); - let stderr = child.stderr.take().expect("Failed to get stderr"); + let stdout = journal_child.stdout.take().expect("Failed to get journalctl stdout"); + let mut reader = BufReader::new(stdout); + let mut lines = reader.lines(); - // Create readers for both streams - let stdout_reader = BufReader::new(stdout); - let stderr_reader = BufReader::new(stderr); + // Stream journal output and monitor service status + let mut service_completed = false; + let mut status_check_interval = tokio::time::interval(tokio::time::Duration::from_secs(2)); - let mut stdout_lines = stdout_reader.lines(); - let mut stderr_lines = stderr_reader.lines(); - - // Stream output lines in real-time loop { tokio::select! { - // Read from stdout - line = stdout_lines.next_line() => { + // Read journal output + line = lines.next_line() => { match line { Ok(Some(line)) => { - self.send_command_output(command_id, "SystemRebuild", &line).await?; + // Clean up journal format (remove timestamp/service prefix if needed) + let clean_line = self.clean_journal_line(&line); + self.send_command_output(command_id, "SystemRebuild", &clean_line).await?; } Ok(None) => { - // stdout closed + // journalctl stream ended + break; } - Err(e) => { - self.send_command_output(command_id, "SystemRebuild", &format!("stdout error: {}", e)).await?; + Err(_) => { + // Error reading journal + break; } } } - // Read from stderr - line = stderr_lines.next_line() => { - match line { - Ok(Some(line)) => { - self.send_command_output(command_id, "SystemRebuild", &line).await?; + // Periodically check service status + _ = status_check_interval.tick() => { + if let Ok(status_result) = Command::new("sudo") + .arg("systemctl") + .arg("is-active") + .arg("cm-rebuild") + .output() + .await + { + let status = String::from_utf8_lossy(&status_result.stdout).trim().to_string(); + if status == "inactive" { + service_completed = true; + break; } - Ok(None) => { - // stderr closed - } - Err(e) => { - self.send_command_output(command_id, "SystemRebuild", &format!("stderr error: {}", e)).await?; - } - } - } - // Wait for process completion - result = child.wait() => { - let status = result?; - if status.success() { - return Ok(()); - } else { - return Err(anyhow::anyhow!("nixos-rebuild exited with status: {}", status)); } } } } + + // Kill journalctl process + let _ = journal_child.kill().await; + + // Check final service result + let result = Command::new("sudo") + .arg("systemctl") + .arg("is-failed") + .arg("cm-rebuild") + .output() + .await?; + + let is_failed = String::from_utf8_lossy(&result.stdout).trim(); + if is_failed == "failed" { + return Err(anyhow::anyhow!("cm-rebuild service failed")); + } + + Ok(()) + } + + /// Clean journal line to remove systemd metadata + fn clean_journal_line(&self, line: &str) -> String { + // Remove timestamp and service name prefix from journal entries + // Example: "Oct 26 10:30:15 cmbox cm-rebuild[1234]: actual output" + // Becomes: "actual output" + + if let Some(colon_pos) = line.rfind(": ") { + line[colon_pos + 2..].to_string() + } else { + line.to_string() + } } /// Ensure git repository with output streaming diff --git a/dashboard/Cargo.toml b/dashboard/Cargo.toml index 0821989..bfb455c 100644 --- a/dashboard/Cargo.toml +++ b/dashboard/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard" -version = "0.1.11" +version = "0.1.13" edition = "2021" [dependencies] diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 8027f56..c5e3cba 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cm-dashboard-shared" -version = "0.1.11" +version = "0.1.13" edition = "2021" [dependencies]