Implement systemd service approach for nixos-rebuild operations
Some checks failed
Build and Release / build-and-release (push) Failing after 1m58s
Some checks failed
Build and Release / build-and-release (push) Failing after 1m58s
- Add cm-rebuild systemd service for process isolation - Add sudo permissions for service control and journal access - Remove verbose flag for cleaner output - Ensures reliable rebuild operations without agent crashes
This commit is contained in:
parent
a928d73134
commit
69892a2d84
50
CLAUDE.md
50
CLAUDE.md
@ -92,6 +92,56 @@ Storage:
|
|||||||
- ✅ **Git Clone Approach**: Implemented for nixos-rebuild to avoid directory permissions
|
- ✅ **Git Clone Approach**: Implemented for nixos-rebuild to avoid directory permissions
|
||||||
- ✅ **Visual Feedback**: Directional arrows for service status (↑ starting, ↓ stopping, ↻ restarting)
|
- ✅ **Visual Feedback**: Directional arrows for service status (↑ starting, ↓ stopping, ↻ restarting)
|
||||||
|
|
||||||
|
### Terminal Popup for Real-time Output - IMPLEMENTED ✅
|
||||||
|
|
||||||
|
**Status (as of 2025-10-26):**
|
||||||
|
- ✅ **Terminal Popup UI**: 80% screen coverage with terminal styling and color-coded output
|
||||||
|
- ✅ **ZMQ Streaming Protocol**: CommandOutputMessage for real-time output transmission
|
||||||
|
- ✅ **Keyboard Controls**: ESC/Q to close, ↑↓ to scroll, manual close (no auto-close)
|
||||||
|
- ✅ **Real-time Display**: Live streaming of command output as it happens
|
||||||
|
- ✅ **Version-based Agent Reporting**: Shows "Agent: v0.1.13" instead of nix store hash
|
||||||
|
|
||||||
|
**Current Implementation Issues:**
|
||||||
|
- ❌ **Agent Process Crashes**: Agent dies during nixos-rebuild execution
|
||||||
|
- ❌ **Inconsistent Output**: Different outputs each time 'R' is pressed
|
||||||
|
- ❌ **Limited Output Visibility**: Not capturing all nixos-rebuild progress
|
||||||
|
|
||||||
|
**PLANNED SOLUTION - Systemd Service Approach:**
|
||||||
|
|
||||||
|
**Problem**: Direct nixos-rebuild execution in agent causes process crashes and inconsistent output.
|
||||||
|
|
||||||
|
**Solution**: Create dedicated systemd service for rebuild operations.
|
||||||
|
|
||||||
|
**Implementation Plan:**
|
||||||
|
1. **NixOS Systemd Service**:
|
||||||
|
```nix
|
||||||
|
systemd.services.cm-rebuild = {
|
||||||
|
description = "CM Dashboard NixOS Rebuild";
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
ExecStart = "${pkgs.nixos-rebuild}/bin/nixos-rebuild switch --flake . --option sandbox false";
|
||||||
|
WorkingDirectory = "/var/lib/cm-dashboard/nixos-config";
|
||||||
|
User = "root";
|
||||||
|
StandardOutput = "journal";
|
||||||
|
StandardError = "journal";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Agent Modification**:
|
||||||
|
- Replace direct nixos-rebuild execution with: `systemctl start cm-rebuild`
|
||||||
|
- Stream output via: `journalctl -u cm-rebuild -f --no-pager`
|
||||||
|
- Monitor service status for completion detection
|
||||||
|
|
||||||
|
3. **Benefits**:
|
||||||
|
- **Process Isolation**: Service runs independently, won't crash agent
|
||||||
|
- **Consistent Output**: Always same deterministic rebuild process
|
||||||
|
- **Proper Logging**: systemd journal handles all output management
|
||||||
|
- **Resource Management**: systemd manages cleanup and resource limits
|
||||||
|
- **Status Tracking**: Can query service status (running/failed/success)
|
||||||
|
|
||||||
|
**Next Priority**: Implement systemd service approach for reliable rebuild operations.
|
||||||
|
|
||||||
**Keyboard Controls Status:**
|
**Keyboard Controls Status:**
|
||||||
- **Services Panel**:
|
- **Services Panel**:
|
||||||
- R (restart) ✅ Working
|
- R (restart) ✅ Working
|
||||||
|
|||||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.0"
|
version = "0.1.13"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
@ -291,7 +291,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.0"
|
version = "0.1.13"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@ -314,7 +314,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.0"
|
version = "0.1.13"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.11"
|
version = "0.1.13"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@ -385,77 +385,118 @@ impl Agent {
|
|||||||
self.zmq_handler.publish_command_output(&message).await
|
self.zmq_handler.publish_command_output(&message).await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Execute nixos-rebuild with real-time output streaming
|
/// Execute nixos-rebuild via systemd service with journal streaming
|
||||||
async fn execute_nixos_rebuild_with_streaming(&self, command_id: &str, working_dir: &str) -> Result<()> {
|
async fn execute_nixos_rebuild_with_streaming(&self, command_id: &str, _working_dir: &str) -> Result<()> {
|
||||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||||
use tokio::process::Command;
|
use tokio::process::Command;
|
||||||
|
|
||||||
let mut child = Command::new("sudo")
|
self.send_command_output(command_id, "SystemRebuild", "Starting nixos-rebuild via systemd service...").await?;
|
||||||
.arg("/run/current-system/sw/bin/nixos-rebuild")
|
|
||||||
.arg("switch")
|
// Start the cm-rebuild systemd service
|
||||||
.arg("--option")
|
let start_result = Command::new("sudo")
|
||||||
.arg("sandbox")
|
.arg("systemctl")
|
||||||
.arg("false")
|
.arg("start")
|
||||||
.arg("--flake")
|
.arg("cm-rebuild")
|
||||||
.arg(".")
|
.output()
|
||||||
.current_dir(working_dir)
|
.await?;
|
||||||
|
|
||||||
|
if !start_result.status.success() {
|
||||||
|
let error = String::from_utf8_lossy(&start_result.stderr);
|
||||||
|
return Err(anyhow::anyhow!("Failed to start cm-rebuild service: {}", error));
|
||||||
|
}
|
||||||
|
|
||||||
|
self.send_command_output(command_id, "SystemRebuild", "✓ Service started, streaming output...").await?;
|
||||||
|
|
||||||
|
// Stream journal output in real-time
|
||||||
|
let mut journal_child = Command::new("sudo")
|
||||||
|
.arg("journalctl")
|
||||||
|
.arg("-u")
|
||||||
|
.arg("cm-rebuild")
|
||||||
|
.arg("-f")
|
||||||
|
.arg("--no-pager")
|
||||||
|
.arg("--since")
|
||||||
|
.arg("now")
|
||||||
.stdout(std::process::Stdio::piped())
|
.stdout(std::process::Stdio::piped())
|
||||||
.stderr(std::process::Stdio::piped())
|
.stderr(std::process::Stdio::piped())
|
||||||
.spawn()?;
|
.spawn()?;
|
||||||
|
|
||||||
// Get stdout and stderr handles
|
let stdout = journal_child.stdout.take().expect("Failed to get journalctl stdout");
|
||||||
let stdout = child.stdout.take().expect("Failed to get stdout");
|
let mut reader = BufReader::new(stdout);
|
||||||
let stderr = child.stderr.take().expect("Failed to get stderr");
|
let mut lines = reader.lines();
|
||||||
|
|
||||||
// Create readers for both streams
|
// Stream journal output and monitor service status
|
||||||
let stdout_reader = BufReader::new(stdout);
|
let mut service_completed = false;
|
||||||
let stderr_reader = BufReader::new(stderr);
|
let mut status_check_interval = tokio::time::interval(tokio::time::Duration::from_secs(2));
|
||||||
|
|
||||||
let mut stdout_lines = stdout_reader.lines();
|
|
||||||
let mut stderr_lines = stderr_reader.lines();
|
|
||||||
|
|
||||||
// Stream output lines in real-time
|
|
||||||
loop {
|
loop {
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
// Read from stdout
|
// Read journal output
|
||||||
line = stdout_lines.next_line() => {
|
line = lines.next_line() => {
|
||||||
match line {
|
match line {
|
||||||
Ok(Some(line)) => {
|
Ok(Some(line)) => {
|
||||||
self.send_command_output(command_id, "SystemRebuild", &line).await?;
|
// Clean up journal format (remove timestamp/service prefix if needed)
|
||||||
|
let clean_line = self.clean_journal_line(&line);
|
||||||
|
self.send_command_output(command_id, "SystemRebuild", &clean_line).await?;
|
||||||
}
|
}
|
||||||
Ok(None) => {
|
Ok(None) => {
|
||||||
// stdout closed
|
// journalctl stream ended
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(_) => {
|
||||||
self.send_command_output(command_id, "SystemRebuild", &format!("stdout error: {}", e)).await?;
|
// Error reading journal
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Read from stderr
|
// Periodically check service status
|
||||||
line = stderr_lines.next_line() => {
|
_ = status_check_interval.tick() => {
|
||||||
match line {
|
if let Ok(status_result) = Command::new("sudo")
|
||||||
Ok(Some(line)) => {
|
.arg("systemctl")
|
||||||
self.send_command_output(command_id, "SystemRebuild", &line).await?;
|
.arg("is-active")
|
||||||
|
.arg("cm-rebuild")
|
||||||
|
.output()
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
let status = String::from_utf8_lossy(&status_result.stdout).trim().to_string();
|
||||||
|
if status == "inactive" {
|
||||||
|
service_completed = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
Ok(None) => {
|
|
||||||
// stderr closed
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
self.send_command_output(command_id, "SystemRebuild", &format!("stderr error: {}", e)).await?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Wait for process completion
|
|
||||||
result = child.wait() => {
|
|
||||||
let status = result?;
|
|
||||||
if status.success() {
|
|
||||||
return Ok(());
|
|
||||||
} else {
|
|
||||||
return Err(anyhow::anyhow!("nixos-rebuild exited with status: {}", status));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Kill journalctl process
|
||||||
|
let _ = journal_child.kill().await;
|
||||||
|
|
||||||
|
// Check final service result
|
||||||
|
let result = Command::new("sudo")
|
||||||
|
.arg("systemctl")
|
||||||
|
.arg("is-failed")
|
||||||
|
.arg("cm-rebuild")
|
||||||
|
.output()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let is_failed = String::from_utf8_lossy(&result.stdout).trim();
|
||||||
|
if is_failed == "failed" {
|
||||||
|
return Err(anyhow::anyhow!("cm-rebuild service failed"));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clean journal line to remove systemd metadata
|
||||||
|
fn clean_journal_line(&self, line: &str) -> String {
|
||||||
|
// Remove timestamp and service name prefix from journal entries
|
||||||
|
// Example: "Oct 26 10:30:15 cmbox cm-rebuild[1234]: actual output"
|
||||||
|
// Becomes: "actual output"
|
||||||
|
|
||||||
|
if let Some(colon_pos) = line.rfind(": ") {
|
||||||
|
line[colon_pos + 2..].to_string()
|
||||||
|
} else {
|
||||||
|
line.to_string()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Ensure git repository with output streaming
|
/// Ensure git repository with output streaming
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.11"
|
version = "0.1.13"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.11"
|
version = "0.1.13"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user