Remove Tailscale and connection type complexity
Some checks failed
Build and Release / build-and-release (push) Has been cancelled
Some checks failed
Build and Release / build-and-release (push) Has been cancelled
Simplifies host connection configuration by removing tailscale_ip field, connection_type preferences, and fallback retry logic. Now uses only the ip field or hostname as fallback. Eliminates blocking TCP connectivity tests that interfered with heartbeat processing. This resolves intermittent host lost/found issues by removing the connection retry timeouts that blocked the ZMQ message processing loop.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.69"
|
||||
version = "0.1.70"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -351,36 +351,40 @@ impl Agent {
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let output = tokio::process::Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg(action_str)
|
||||
.arg(format!("{}.service", service_name))
|
||||
.output()
|
||||
.await?;
|
||||
// Spawn the systemctl command asynchronously to avoid blocking the agent
|
||||
let service_name_clone = service_name.to_string();
|
||||
let action_str_clone = action_str.to_string();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let result = tokio::process::Command::new("sudo")
|
||||
.arg("systemctl")
|
||||
.arg(&action_str_clone)
|
||||
.arg(format!("{}.service", service_name_clone))
|
||||
.output()
|
||||
.await;
|
||||
|
||||
if output.status.success() {
|
||||
info!("Service {} {} completed successfully", service_name, action_str);
|
||||
if !output.stdout.is_empty() {
|
||||
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
|
||||
match result {
|
||||
Ok(output) => {
|
||||
if output.status.success() {
|
||||
info!("Service {} {} completed successfully", service_name_clone, action_str_clone);
|
||||
if !output.stdout.is_empty() {
|
||||
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
|
||||
}
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
error!("Service {} {} failed: {}", service_name_clone, action_str_clone, stderr);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to execute systemctl {} {}: {}", action_str_clone, service_name_clone, e);
|
||||
}
|
||||
}
|
||||
|
||||
// Note: User-stopped flag will be cleared by systemd collector
|
||||
// when service actually reaches 'active' state, not here
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
error!("Service {} {} failed: {}", service_name, action_str, stderr);
|
||||
return Err(anyhow::anyhow!("systemctl {} {} failed: {}", action_str, service_name, stderr));
|
||||
}
|
||||
});
|
||||
|
||||
// Force refresh metrics after service control to update service status
|
||||
if matches!(action, ServiceAction::Start | ServiceAction::Stop | ServiceAction::UserStart | ServiceAction::UserStop) {
|
||||
info!("Triggering immediate metric refresh after service control");
|
||||
if let Err(e) = self.collect_metrics_only().await {
|
||||
error!("Failed to refresh metrics after service control: {}", e);
|
||||
} else {
|
||||
info!("Service status refreshed immediately after {} {}", action_str, service_name);
|
||||
}
|
||||
}
|
||||
info!("Service {} {} command initiated (non-blocking)", service_name, action_str);
|
||||
|
||||
// Note: Service status will be updated by the normal metric collection cycle
|
||||
// once the systemctl operation completes
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user