Compare commits

...

4 Commits

Author SHA1 Message Date
33e700529e Bump version to 0.1.71
All checks were successful
Build and Release / build-and-release (push) Successful in 1m30s
Version bump for release with fixed automated NixOS configuration
update workflow that uses the correct file path.
2025-11-15 10:25:08 +01:00
d644b7d40a Fix NixOS config path in automated release workflow
Update release.yml to use correct path hosts/services/cm-dashboard.nix
instead of hosts/common/cm-dashboard.nix. Also update documentation
in CLAUDE.md and README.md to reflect the correct file location.
2025-11-15 10:21:30 +01:00
f635ba9c75 Remove Tailscale and connection type complexity
Some checks failed
Build and Release / build-and-release (push) Has been cancelled
Simplifies host connection configuration by removing tailscale_ip field,
connection_type preferences, and fallback retry logic. Now uses only the
ip field or hostname as fallback. Eliminates blocking TCP connectivity
tests that interfered with heartbeat processing.

This resolves intermittent host lost/found issues by removing the
connection retry timeouts that blocked the ZMQ message processing loop.
2025-11-15 10:04:47 +01:00
76b6e3373e Change auto connection type to prioritize local IP first
All checks were successful
Build and Release / build-and-release (push) Successful in 2m36s
Update the auto connection type logic to try local network connections
before falling back to Tailscale. This provides better performance by
using faster local connections when available while maintaining Tailscale
as a reliable fallback.

Changes:
- Auto connection priority: local → tailscale → hostname (was tailscale → local)
- Fallback retry order updated to match new priority
- Supports omitting IP field in config for hosts without static local IP
2025-11-13 12:52:46 +01:00
10 changed files with 74 additions and 118 deletions

View File

@@ -113,13 +113,13 @@ jobs:
NIX_HASH="sha256-$(python3 -c "import base64, binascii; print(base64.b64encode(binascii.unhexlify('$NEW_HASH')).decode())")"
# Update the NixOS configuration
sed -i "s|version = \"v[^\"]*\"|version = \"$VERSION\"|" hosts/common/cm-dashboard.nix
sed -i "s|sha256 = \"sha256-[^\"]*\"|sha256 = \"$NIX_HASH\"|" hosts/common/cm-dashboard.nix
sed -i "s|version = \"v[^\"]*\"|version = \"$VERSION\"|" hosts/services/cm-dashboard.nix
sed -i "s|sha256 = \"sha256-[^\"]*\"|sha256 = \"$NIX_HASH\"|" hosts/services/cm-dashboard.nix
# Commit and push changes
git config user.name "Gitea Actions"
git config user.email "actions@gitea.cmtec.se"
git add hosts/common/cm-dashboard.nix
git add hosts/services/cm-dashboard.nix
git commit -m "Auto-update cm-dashboard to $VERSION
- Update version to $VERSION with automated release

View File

@@ -115,7 +115,7 @@ This automatically:
- Uploads binaries via Gitea API
### NixOS Configuration Updates
Edit `~/projects/nixosbox/hosts/common/cm-dashboard.nix`:
Edit `~/projects/nixosbox/hosts/services/cm-dashboard.nix`:
```nix
version = "v0.1.X";

6
Cargo.lock generated
View File

@@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
[[package]]
name = "cm-dashboard"
version = "0.1.67"
version = "0.1.69"
dependencies = [
"anyhow",
"chrono",
@@ -292,7 +292,7 @@ dependencies = [
[[package]]
name = "cm-dashboard-agent"
version = "0.1.67"
version = "0.1.69"
dependencies = [
"anyhow",
"async-trait",
@@ -315,7 +315,7 @@ dependencies = [
[[package]]
name = "cm-dashboard-shared"
version = "0.1.67"
version = "0.1.69"
dependencies = [
"chrono",
"serde",

View File

@@ -329,7 +329,7 @@ This triggers automated:
- Tarball upload to Gitea
### NixOS Integration
Update `~/projects/nixosbox/hosts/common/cm-dashboard.nix`:
Update `~/projects/nixosbox/hosts/services/cm-dashboard.nix`:
```nix
version = "v0.1.43";

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-agent"
version = "0.1.68"
version = "0.1.71"
edition = "2021"
[dependencies]

View File

@@ -351,36 +351,40 @@ impl Agent {
_ => {}
}
let output = tokio::process::Command::new("sudo")
.arg("systemctl")
.arg(action_str)
.arg(format!("{}.service", service_name))
.output()
.await?;
// Spawn the systemctl command asynchronously to avoid blocking the agent
let service_name_clone = service_name.to_string();
let action_str_clone = action_str.to_string();
tokio::spawn(async move {
let result = tokio::process::Command::new("sudo")
.arg("systemctl")
.arg(&action_str_clone)
.arg(format!("{}.service", service_name_clone))
.output()
.await;
if output.status.success() {
info!("Service {} {} completed successfully", service_name, action_str);
if !output.stdout.is_empty() {
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
match result {
Ok(output) => {
if output.status.success() {
info!("Service {} {} completed successfully", service_name_clone, action_str_clone);
if !output.stdout.is_empty() {
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
}
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
error!("Service {} {} failed: {}", service_name_clone, action_str_clone, stderr);
}
}
Err(e) => {
error!("Failed to execute systemctl {} {}: {}", action_str_clone, service_name_clone, e);
}
}
// Note: User-stopped flag will be cleared by systemd collector
// when service actually reaches 'active' state, not here
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
error!("Service {} {} failed: {}", service_name, action_str, stderr);
return Err(anyhow::anyhow!("systemctl {} {} failed: {}", action_str, service_name, stderr));
}
});
// Force refresh metrics after service control to update service status
if matches!(action, ServiceAction::Start | ServiceAction::Stop | ServiceAction::UserStart | ServiceAction::UserStop) {
info!("Triggering immediate metric refresh after service control");
if let Err(e) = self.collect_metrics_only().await {
error!("Failed to refresh metrics after service control: {}", e);
} else {
info!("Service status refreshed immediately after {} {}", action_str, service_name);
}
}
info!("Service {} {} command initiated (non-blocking)", service_name, action_str);
// Note: Service status will be updated by the normal metric collection cycle
// once the systemctl operation completes
Ok(())
}

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard"
version = "0.1.68"
version = "0.1.71"
edition = "2021"
[dependencies]

View File

@@ -71,6 +71,12 @@ impl ZmqConsumer {
pub async fn connect_to_host(&mut self, hostname: &str, port: u16) -> Result<()> {
let address = format!("tcp://{}:{}", hostname, port);
// First test basic TCP connectivity to the port
if let Err(e) = self.test_tcp_connectivity(hostname, port).await {
error!("TCP connectivity test failed for {}: {}", address, e);
return Err(e);
}
match self.subscriber.connect(&address) {
Ok(()) => {
info!("Connected to agent at {}", address);
@@ -84,6 +90,26 @@ impl ZmqConsumer {
}
}
/// Test TCP connectivity to a host and port with timeout
async fn test_tcp_connectivity(&self, hostname: &str, port: u16) -> Result<()> {
let timeout = std::time::Duration::from_secs(3);
match tokio::time::timeout(timeout, tokio::net::TcpStream::connect((hostname, port))).await {
Ok(Ok(_stream)) => {
debug!("TCP connectivity test passed for {}:{}", hostname, port);
Ok(())
}
Ok(Err(e)) => {
debug!("TCP connectivity test failed for {}:{}: {}", hostname, port, e);
Err(anyhow::anyhow!("TCP connection failed: {}", e))
}
Err(_) => {
debug!("TCP connectivity test timed out for {}:{}", hostname, port);
Err(anyhow::anyhow!("TCP connection timed out"))
}
}
}
/// Connect to predefined hosts using their configuration
pub async fn connect_to_predefined_hosts(&mut self, hosts: &std::collections::HashMap<String, crate::config::HostDetails>) -> Result<()> {
let default_port = self.config.subscriber_ports[0];
@@ -104,27 +130,13 @@ impl ZmqConsumer {
Ok(())
}
/// Connect to a host using its configuration details with fallback support
/// Connect to a host using its configuration details
pub async fn connect_to_host_with_details(&mut self, hostname: &str, host_details: &crate::config::HostDetails, port: u16) -> Result<()> {
// Get primary connection IP
// Get primary connection IP only - no fallbacks
let primary_ip = host_details.get_connection_ip(hostname);
// Try primary connection
if let Ok(()) = self.connect_to_host(&primary_ip, port).await {
info!("Connected to {} via primary address: {}", hostname, primary_ip);
return Ok(());
}
// Try fallback IPs if primary fails
let fallbacks = host_details.get_fallback_ips(hostname);
for fallback_ip in fallbacks {
if let Ok(()) = self.connect_to_host(&fallback_ip, port).await {
info!("Connected to {} via fallback address: {}", hostname, fallback_ip);
return Ok(());
}
}
Err(anyhow::anyhow!("Failed to connect to {} using all available addresses", hostname))
// Connect directly without fallback attempts
self.connect_to_host(&primary_ip, port).await
}
/// Receive command output from any connected agent (non-blocking)

View File

@@ -31,75 +31,15 @@ pub struct HostDetails {
pub mac_address: Option<String>,
/// Primary IP address (local network)
pub ip: Option<String>,
/// Tailscale network IP address
pub tailscale_ip: Option<String>,
/// Preferred connection type: "local", "tailscale", or "auto" (fallback)
#[serde(default = "default_connection_type")]
pub connection_type: String,
}
fn default_connection_type() -> String {
"auto".to_string()
}
impl HostDetails {
/// Get the preferred IP address for connection based on connection_type
/// Get the IP address for connection (uses ip field or hostname as fallback)
pub fn get_connection_ip(&self, hostname: &str) -> String {
match self.connection_type.as_str() {
"tailscale" => {
if let Some(ref ts_ip) = self.tailscale_ip {
ts_ip.clone()
} else {
// Fallback to local IP or hostname
self.ip.as_ref().unwrap_or(&hostname.to_string()).clone()
}
}
"local" => {
if let Some(ref local_ip) = self.ip {
local_ip.clone()
} else {
hostname.to_string()
}
}
"auto" | _ => {
// Try tailscale first, then local, then hostname
if let Some(ref ts_ip) = self.tailscale_ip {
ts_ip.clone()
} else if let Some(ref local_ip) = self.ip {
local_ip.clone()
} else {
hostname.to_string()
}
}
}
self.ip.as_ref().unwrap_or(&hostname.to_string()).clone()
}
/// Get fallback IP addresses for connection retry
pub fn get_fallback_ips(&self, hostname: &str) -> Vec<String> {
let mut fallbacks = Vec::new();
// Add all available IPs except the primary one
let primary = self.get_connection_ip(hostname);
if let Some(ref ts_ip) = self.tailscale_ip {
if ts_ip != &primary {
fallbacks.push(ts_ip.clone());
}
}
if let Some(ref local_ip) = self.ip {
if local_ip != &primary {
fallbacks.push(local_ip.clone());
}
}
// Always include hostname as final fallback if not already primary
if hostname != primary {
fallbacks.push(hostname.to_string());
}
fallbacks
}
}
/// System configuration

View File

@@ -1,6 +1,6 @@
[package]
name = "cm-dashboard-shared"
version = "0.1.68"
version = "0.1.71"
edition = "2021"
[dependencies]