Implement systemd service approach for nixos-rebuild operations
Some checks failed
Build and Release / build-and-release (push) Failing after 1m58s
Some checks failed
Build and Release / build-and-release (push) Failing after 1m58s
- Add cm-rebuild systemd service for process isolation - Add sudo permissions for service control and journal access - Remove verbose flag for cleaner output - Ensures reliable rebuild operations without agent crashes
This commit is contained in:
parent
a928d73134
commit
69892a2d84
50
CLAUDE.md
50
CLAUDE.md
@ -92,6 +92,56 @@ Storage:
|
||||
- ✅ **Git Clone Approach**: Implemented for nixos-rebuild to avoid directory permissions
|
||||
- ✅ **Visual Feedback**: Directional arrows for service status (↑ starting, ↓ stopping, ↻ restarting)
|
||||
|
||||
### Terminal Popup for Real-time Output - IMPLEMENTED ✅
|
||||
|
||||
**Status (as of 2025-10-26):**
|
||||
- ✅ **Terminal Popup UI**: 80% screen coverage with terminal styling and color-coded output
|
||||
- ✅ **ZMQ Streaming Protocol**: CommandOutputMessage for real-time output transmission
|
||||
- ✅ **Keyboard Controls**: ESC/Q to close, ↑↓ to scroll, manual close (no auto-close)
|
||||
- ✅ **Real-time Display**: Live streaming of command output as it happens
|
||||
- ✅ **Version-based Agent Reporting**: Shows "Agent: v0.1.13" instead of nix store hash
|
||||
|
||||
**Current Implementation Issues:**
|
||||
- ❌ **Agent Process Crashes**: Agent dies during nixos-rebuild execution
|
||||
- ❌ **Inconsistent Output**: Different outputs each time 'R' is pressed
|
||||
- ❌ **Limited Output Visibility**: Not capturing all nixos-rebuild progress
|
||||
|
||||
**PLANNED SOLUTION - Systemd Service Approach:**
|
||||
|
||||
**Problem**: Direct nixos-rebuild execution in agent causes process crashes and inconsistent output.
|
||||
|
||||
**Solution**: Create dedicated systemd service for rebuild operations.
|
||||
|
||||
**Implementation Plan:**
|
||||
1. **NixOS Systemd Service**:
|
||||
```nix
|
||||
systemd.services.cm-rebuild = {
|
||||
description = "CM Dashboard NixOS Rebuild";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = "${pkgs.nixos-rebuild}/bin/nixos-rebuild switch --flake . --option sandbox false";
|
||||
WorkingDirectory = "/var/lib/cm-dashboard/nixos-config";
|
||||
User = "root";
|
||||
StandardOutput = "journal";
|
||||
StandardError = "journal";
|
||||
};
|
||||
};
|
||||
```
|
||||
|
||||
2. **Agent Modification**:
|
||||
- Replace direct nixos-rebuild execution with: `systemctl start cm-rebuild`
|
||||
- Stream output via: `journalctl -u cm-rebuild -f --no-pager`
|
||||
- Monitor service status for completion detection
|
||||
|
||||
3. **Benefits**:
|
||||
- **Process Isolation**: Service runs independently, won't crash agent
|
||||
- **Consistent Output**: Always same deterministic rebuild process
|
||||
- **Proper Logging**: systemd journal handles all output management
|
||||
- **Resource Management**: systemd manages cleanup and resource limits
|
||||
- **Status Tracking**: Can query service status (running/failed/success)
|
||||
|
||||
**Next Priority**: Implement systemd service approach for reliable rebuild operations.
|
||||
|
||||
**Keyboard Controls Status:**
|
||||
- **Services Panel**:
|
||||
- R (restart) ✅ Working
|
||||
|
||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.0"
|
||||
version = "0.1.13"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@ -291,7 +291,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.0"
|
||||
version = "0.1.13"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@ -314,7 +314,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.0"
|
||||
version = "0.1.13"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.11"
|
||||
version = "0.1.13"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@ -385,77 +385,118 @@ impl Agent {
|
||||
self.zmq_handler.publish_command_output(&message).await
|
||||
}
|
||||
|
||||
/// Execute nixos-rebuild with real-time output streaming
|
||||
async fn execute_nixos_rebuild_with_streaming(&self, command_id: &str, working_dir: &str) -> Result<()> {
|
||||
/// Execute nixos-rebuild via systemd service with journal streaming
|
||||
async fn execute_nixos_rebuild_with_streaming(&self, command_id: &str, _working_dir: &str) -> Result<()> {
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
|
||||
let mut child = Command::new("sudo")
|
||||
.arg("/run/current-system/sw/bin/nixos-rebuild")
|
||||
.arg("switch")
|
||||
.arg("--option")
|
||||
.arg("sandbox")
|
||||
.arg("false")
|
||||
.arg("--flake")
|
||||
.arg(".")
|
||||
.current_dir(working_dir)
|
||||
self.send_command_output(command_id, "SystemRebuild", "Starting nixos-rebuild via systemd service...").await?;
|
||||
|
||||
// Start the cm-rebuild systemd service
|
||||
let start_result = Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg("start")
|
||||
.arg("cm-rebuild")
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !start_result.status.success() {
|
||||
let error = String::from_utf8_lossy(&start_result.stderr);
|
||||
return Err(anyhow::anyhow!("Failed to start cm-rebuild service: {}", error));
|
||||
}
|
||||
|
||||
self.send_command_output(command_id, "SystemRebuild", "✓ Service started, streaming output...").await?;
|
||||
|
||||
// Stream journal output in real-time
|
||||
let mut journal_child = Command::new("sudo")
|
||||
.arg("journalctl")
|
||||
.arg("-u")
|
||||
.arg("cm-rebuild")
|
||||
.arg("-f")
|
||||
.arg("--no-pager")
|
||||
.arg("--since")
|
||||
.arg("now")
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()?;
|
||||
|
||||
// Get stdout and stderr handles
|
||||
let stdout = child.stdout.take().expect("Failed to get stdout");
|
||||
let stderr = child.stderr.take().expect("Failed to get stderr");
|
||||
let stdout = journal_child.stdout.take().expect("Failed to get journalctl stdout");
|
||||
let mut reader = BufReader::new(stdout);
|
||||
let mut lines = reader.lines();
|
||||
|
||||
// Create readers for both streams
|
||||
let stdout_reader = BufReader::new(stdout);
|
||||
let stderr_reader = BufReader::new(stderr);
|
||||
// Stream journal output and monitor service status
|
||||
let mut service_completed = false;
|
||||
let mut status_check_interval = tokio::time::interval(tokio::time::Duration::from_secs(2));
|
||||
|
||||
let mut stdout_lines = stdout_reader.lines();
|
||||
let mut stderr_lines = stderr_reader.lines();
|
||||
|
||||
// Stream output lines in real-time
|
||||
loop {
|
||||
tokio::select! {
|
||||
// Read from stdout
|
||||
line = stdout_lines.next_line() => {
|
||||
// Read journal output
|
||||
line = lines.next_line() => {
|
||||
match line {
|
||||
Ok(Some(line)) => {
|
||||
self.send_command_output(command_id, "SystemRebuild", &line).await?;
|
||||
// Clean up journal format (remove timestamp/service prefix if needed)
|
||||
let clean_line = self.clean_journal_line(&line);
|
||||
self.send_command_output(command_id, "SystemRebuild", &clean_line).await?;
|
||||
}
|
||||
Ok(None) => {
|
||||
// stdout closed
|
||||
// journalctl stream ended
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
self.send_command_output(command_id, "SystemRebuild", &format!("stdout error: {}", e)).await?;
|
||||
Err(_) => {
|
||||
// Error reading journal
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Read from stderr
|
||||
line = stderr_lines.next_line() => {
|
||||
match line {
|
||||
Ok(Some(line)) => {
|
||||
self.send_command_output(command_id, "SystemRebuild", &line).await?;
|
||||
// Periodically check service status
|
||||
_ = status_check_interval.tick() => {
|
||||
if let Ok(status_result) = Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg("is-active")
|
||||
.arg("cm-rebuild")
|
||||
.output()
|
||||
.await
|
||||
{
|
||||
let status = String::from_utf8_lossy(&status_result.stdout).trim().to_string();
|
||||
if status == "inactive" {
|
||||
service_completed = true;
|
||||
break;
|
||||
}
|
||||
Ok(None) => {
|
||||
// stderr closed
|
||||
}
|
||||
Err(e) => {
|
||||
self.send_command_output(command_id, "SystemRebuild", &format!("stderr error: {}", e)).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Wait for process completion
|
||||
result = child.wait() => {
|
||||
let status = result?;
|
||||
if status.success() {
|
||||
return Ok(());
|
||||
} else {
|
||||
return Err(anyhow::anyhow!("nixos-rebuild exited with status: {}", status));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Kill journalctl process
|
||||
let _ = journal_child.kill().await;
|
||||
|
||||
// Check final service result
|
||||
let result = Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg("is-failed")
|
||||
.arg("cm-rebuild")
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
let is_failed = String::from_utf8_lossy(&result.stdout).trim();
|
||||
if is_failed == "failed" {
|
||||
return Err(anyhow::anyhow!("cm-rebuild service failed"));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Clean journal line to remove systemd metadata
|
||||
fn clean_journal_line(&self, line: &str) -> String {
|
||||
// Remove timestamp and service name prefix from journal entries
|
||||
// Example: "Oct 26 10:30:15 cmbox cm-rebuild[1234]: actual output"
|
||||
// Becomes: "actual output"
|
||||
|
||||
if let Some(colon_pos) = line.rfind(": ") {
|
||||
line[colon_pos + 2..].to_string()
|
||||
} else {
|
||||
line.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensure git repository with output streaming
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.11"
|
||||
version = "0.1.13"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.11"
|
||||
version = "0.1.13"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user