Remove Tailscale and connection type complexity

Simplifies host connection configuration by removing tailscale_ip field, connection_type preferences, and fallback retry logic. Now uses only the ip field or hostname as fallback. Eliminates blocking TCP connectivity tests that interfered with heartbeat processing. This resolves intermittent host lost/found issues by removing the connection retry timeouts that blocked the ZMQ message processing loop.
2025-11-15 10:04:47 +01:00
parent 76b6e3373e
commit f635ba9c75
7 changed files with 69 additions and 114 deletions
--- a/agent/Cargo.toml
+++ b/agent/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cm-dashboard-agent"
-version = "0.1.69"
+version = "0.1.70"
 edition = "2021"

 [dependencies]
--- a/agent/src/agent.rs
+++ b/agent/src/agent.rs
@@ -351,36 +351,40 @@ impl Agent {
            _ => {}
        }

-        let output = tokio::process::Command::new("sudo")
-            .arg("systemctl")
-            .arg(action_str)
-            .arg(format!("{}.service", service_name))
-            .output()
-            .await?;
+        // Spawn the systemctl command asynchronously to avoid blocking the agent
+        let service_name_clone = service_name.to_string();
+        let action_str_clone = action_str.to_string();
+        
+        tokio::spawn(async move {
+            let result = tokio::process::Command::new("sudo")
+                .arg("systemctl")
+                .arg(&action_str_clone)
+                .arg(format!("{}.service", service_name_clone))
+                .output()
+                .await;

-        if output.status.success() {
-            info!("Service {} {} completed successfully", service_name, action_str);
-            if !output.stdout.is_empty() {
-                debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
+            match result {
+                Ok(output) => {
+                    if output.status.success() {
+                        info!("Service {} {} completed successfully", service_name_clone, action_str_clone);
+                        if !output.stdout.is_empty() {
+                            debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
+                        }
+                    } else {
+                        let stderr = String::from_utf8_lossy(&output.stderr);
+                        error!("Service {} {} failed: {}", service_name_clone, action_str_clone, stderr);
+                    }
+                }
+                Err(e) => {
+                    error!("Failed to execute systemctl {} {}: {}", action_str_clone, service_name_clone, e);
+                }
            }
-            
-            // Note: User-stopped flag will be cleared by systemd collector 
-            // when service actually reaches 'active' state, not here
-        } else {
-            let stderr = String::from_utf8_lossy(&output.stderr);
-            error!("Service {} {} failed: {}", service_name, action_str, stderr);
-            return Err(anyhow::anyhow!("systemctl {} {} failed: {}", action_str, service_name, stderr));
-        }
+        });

-        // Force refresh metrics after service control to update service status
-        if matches!(action, ServiceAction::Start | ServiceAction::Stop | ServiceAction::UserStart | ServiceAction::UserStop) {
-            info!("Triggering immediate metric refresh after service control");
-            if let Err(e) = self.collect_metrics_only().await {
-                error!("Failed to refresh metrics after service control: {}", e);
-            } else {
-                info!("Service status refreshed immediately after {} {}", action_str, service_name);
-            }
-        }
+        info!("Service {} {} command initiated (non-blocking)", service_name, action_str);
+
+        // Note: Service status will be updated by the normal metric collection cycle
+        // once the systemctl operation completes

        Ok(())
    }