Complete systemd collector restoration matching original architecture
All checks were successful
Build and Release / build-and-release (push) Successful in 2m8s
All checks were successful
Build and Release / build-and-release (push) Successful in 2m8s
- Add nginx site metrics caching with configurable intervals matching original - Implement complex nginx config parsing with brace counting and redirect detection - Replace curl with reqwest HTTP client for proper timeout and redirect handling - Fix docker container parsing to use comma format with proper status mapping - Add sudo to directory size command for permission handling - Change nginx URLs to use https protocol matching original - Add advanced NixOS ExecStart parsing for argv[] format support - Add nginx -T fallback functionality for config discovery - Implement proper server block parsing with domain validation and brace tracking - Add get_service_memory function matching original signature All functionality now matches pre-refactor implementation architecture.
This commit is contained in:
parent
c006625a3f
commit
eb892096d9
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -279,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.142"
|
||||
version = "0.1.144"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@ -301,7 +301,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.142"
|
||||
version = "0.1.144"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@ -324,7 +324,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.142"
|
||||
version = "0.1.144"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.144"
|
||||
version = "0.1.145"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@ -32,6 +32,12 @@ struct ServiceCacheState {
|
||||
last_discovery_time: Option<Instant>,
|
||||
/// How often to rediscover services (from config)
|
||||
discovery_interval_seconds: u64,
|
||||
/// Cached nginx site latency metrics
|
||||
nginx_site_metrics: Vec<(String, f32)>,
|
||||
/// Last time nginx sites were checked
|
||||
last_nginx_check_time: Option<Instant>,
|
||||
/// How often to check nginx site latency (configurable)
|
||||
nginx_check_interval_seconds: u64,
|
||||
}
|
||||
|
||||
/// Cached service status information from systemctl list-units
|
||||
@ -60,6 +66,9 @@ impl SystemdCollector {
|
||||
service_status_cache: std::collections::HashMap::new(),
|
||||
last_discovery_time: None,
|
||||
discovery_interval_seconds: config.interval_seconds,
|
||||
nginx_site_metrics: Vec::new(),
|
||||
last_nginx_check_time: None,
|
||||
nginx_check_interval_seconds: config.nginx_check_interval_seconds,
|
||||
};
|
||||
|
||||
Self {
|
||||
@ -100,7 +109,7 @@ impl SystemdCollector {
|
||||
|
||||
// Sub-service metrics for specific services
|
||||
if service_name.contains("nginx") && active_status == "active" {
|
||||
let nginx_sites = self.get_nginx_sites();
|
||||
let nginx_sites = self.get_nginx_site_metrics();
|
||||
for (site_name, latency_ms) in nginx_sites {
|
||||
let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms {
|
||||
"active"
|
||||
@ -198,6 +207,35 @@ impl SystemdCollector {
|
||||
Ok(state.monitored_services.clone())
|
||||
}
|
||||
|
||||
/// Get nginx site metrics, checking them if cache is expired
|
||||
fn get_nginx_site_metrics(&self) -> Vec<(String, f32)> {
|
||||
let mut state = self.state.write().unwrap();
|
||||
|
||||
// Check if we need to refresh nginx site metrics
|
||||
let needs_refresh = match state.last_nginx_check_time {
|
||||
None => true, // First time
|
||||
Some(last_time) => {
|
||||
let elapsed = last_time.elapsed().as_secs();
|
||||
elapsed >= state.nginx_check_interval_seconds
|
||||
}
|
||||
};
|
||||
|
||||
if needs_refresh {
|
||||
// Only check nginx sites if nginx service is active
|
||||
if state.monitored_services.iter().any(|s| s.contains("nginx")) {
|
||||
debug!(
|
||||
"Refreshing nginx site latency metrics (interval: {}s)",
|
||||
state.nginx_check_interval_seconds
|
||||
);
|
||||
let fresh_metrics = self.get_nginx_sites_internal();
|
||||
state.nginx_site_metrics = fresh_metrics;
|
||||
state.last_nginx_check_time = Some(Instant::now());
|
||||
}
|
||||
}
|
||||
|
||||
state.nginx_site_metrics.clone()
|
||||
}
|
||||
|
||||
/// Auto-discover interesting services to monitor
|
||||
fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
|
||||
// First: Get all service unit files
|
||||
@ -400,8 +438,8 @@ impl SystemdCollector {
|
||||
|
||||
/// Get size of a directory in GB
|
||||
fn get_directory_size(&self, path: &str) -> Option<f32> {
|
||||
let output = Command::new("du")
|
||||
.args(&["-sb", path])
|
||||
let output = Command::new("sudo")
|
||||
.args(&["du", "-sb", path])
|
||||
.output()
|
||||
.ok()?;
|
||||
|
||||
@ -431,6 +469,25 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get service memory usage (if available)
|
||||
fn get_service_memory(&self, service: &str) -> Option<f32> {
|
||||
let output = Command::new("systemctl")
|
||||
.args(&["show", &format!("{}.service", service), "--property=MemoryCurrent"])
|
||||
.output()
|
||||
.ok()?;
|
||||
|
||||
let output_str = String::from_utf8(output.stdout).ok()?;
|
||||
for line in output_str.lines() {
|
||||
if line.starts_with("MemoryCurrent=") {
|
||||
let memory_str = line.strip_prefix("MemoryCurrent=")?;
|
||||
if let Ok(memory_bytes) = memory_str.parse::<u64>() {
|
||||
return Some(memory_bytes as f32 / (1024.0 * 1024.0)); // Convert to MB
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Calculate service status, taking user-stopped services into account
|
||||
fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status {
|
||||
match active_status.to_lowercase().as_str() {
|
||||
@ -498,8 +555,8 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get nginx sites with latency checks
|
||||
fn get_nginx_sites(&self) -> Vec<(String, f32)> {
|
||||
/// Get nginx sites with latency checks (internal - no caching)
|
||||
fn get_nginx_sites_internal(&self) -> Vec<(String, f32)> {
|
||||
let mut sites = Vec::new();
|
||||
|
||||
// Discover nginx sites from configuration
|
||||
@ -522,24 +579,38 @@ impl SystemdCollector {
|
||||
|
||||
/// Discover nginx sites from configuration
|
||||
fn discover_nginx_sites(&self) -> Vec<(String, String)> {
|
||||
let mut sites = Vec::new();
|
||||
// Use the same approach as the old working agent: get nginx config from systemd
|
||||
let config_content = match self.get_nginx_config_from_systemd() {
|
||||
Some(content) => content,
|
||||
None => {
|
||||
debug!("Could not get nginx config from systemd, trying nginx -T fallback");
|
||||
match self.get_nginx_config_via_command() {
|
||||
Some(content) => content,
|
||||
None => {
|
||||
debug!("Could not get nginx config via any method");
|
||||
return Vec::new();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Try to get nginx config from systemd service definition (NixOS compatible)
|
||||
if let Some(config_content) = self.get_nginx_config_from_systemd() {
|
||||
sites.extend(self.parse_nginx_config_for_sites(&config_content));
|
||||
// Parse the config content to extract sites
|
||||
self.parse_nginx_config_for_sites(&config_content)
|
||||
}
|
||||
|
||||
// Fallback: try standard nginx config paths
|
||||
if sites.is_empty() {
|
||||
for config_path in ["/etc/nginx/nginx.conf", "/usr/local/nginx/conf/nginx.conf"] {
|
||||
if let Ok(config_content) = std::fs::read_to_string(config_path) {
|
||||
sites.extend(self.parse_nginx_config_for_sites(&config_content));
|
||||
break;
|
||||
}
|
||||
}
|
||||
/// Fallback: get nginx config via nginx -T command
|
||||
fn get_nginx_config_via_command(&self) -> Option<String> {
|
||||
let output = Command::new("nginx")
|
||||
.args(&["-T"])
|
||||
.output()
|
||||
.ok()?;
|
||||
|
||||
if !output.status.success() {
|
||||
debug!("nginx -T failed");
|
||||
return None;
|
||||
}
|
||||
|
||||
sites
|
||||
Some(String::from_utf8_lossy(&output.stdout).to_string())
|
||||
}
|
||||
|
||||
/// Get nginx config from systemd service definition (NixOS compatible)
|
||||
@ -572,89 +643,138 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
/// Extract config path from ExecStart line
|
||||
fn extract_config_path_from_exec_start(&self, exec_start_line: &str) -> Option<String> {
|
||||
// Handle both traditional and NixOS systemd formats
|
||||
let parts: Vec<&str> = exec_start_line.split_whitespace().collect();
|
||||
fn extract_config_path_from_exec_start(&self, exec_start: &str) -> Option<String> {
|
||||
// Remove ExecStart= prefix
|
||||
let exec_part = exec_start.strip_prefix("ExecStart=")?;
|
||||
debug!("Parsing exec part: {}", exec_part);
|
||||
|
||||
for (i, part) in parts.iter().enumerate() {
|
||||
if part == &"-c" && i + 1 < parts.len() {
|
||||
return Some(parts[i + 1].to_string());
|
||||
// Handle NixOS format: ExecStart={ path=...; argv[]=...nginx -c /config; ... }
|
||||
if exec_part.contains("argv[]=") {
|
||||
// Extract the part after argv[]=
|
||||
let argv_start = exec_part.find("argv[]=")?;
|
||||
let argv_part = &exec_part[argv_start + 7..]; // Skip "argv[]="
|
||||
debug!("Found NixOS argv part: {}", argv_part);
|
||||
|
||||
// Look for -c flag followed by config path
|
||||
if let Some(c_pos) = argv_part.find(" -c ") {
|
||||
let after_c = &argv_part[c_pos + 4..];
|
||||
// Find the config path (until next space or semicolon)
|
||||
let config_path = after_c.split([' ', ';']).next()?;
|
||||
return Some(config_path.to_string());
|
||||
}
|
||||
} else {
|
||||
// Handle traditional format: ExecStart=/path/nginx -c /config
|
||||
debug!("Parsing traditional format");
|
||||
if let Some(c_pos) = exec_part.find(" -c ") {
|
||||
let after_c = &exec_part[c_pos + 4..];
|
||||
let config_path = after_c.split_whitespace().next()?;
|
||||
return Some(config_path.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Parse nginx config content to extract sites
|
||||
/// Parse nginx config content to extract server names and build site list
|
||||
fn parse_nginx_config_for_sites(&self, config_content: &str) -> Vec<(String, String)> {
|
||||
let mut sites = Vec::new();
|
||||
let mut current_server_name: Option<String> = None;
|
||||
let mut current_listen_port: Option<u16> = None;
|
||||
let mut in_server_block = false;
|
||||
let lines: Vec<&str> = config_content.lines().collect();
|
||||
let mut i = 0;
|
||||
|
||||
for line in config_content.lines() {
|
||||
let line = line.trim();
|
||||
debug!("Parsing nginx config with {} lines", lines.len());
|
||||
|
||||
if line.starts_with("server {") {
|
||||
in_server_block = true;
|
||||
current_server_name = None;
|
||||
current_listen_port = None;
|
||||
} else if line == "}" && in_server_block {
|
||||
// End of server block - create site entry if we have both name and port
|
||||
if let (Some(name), Some(port)) = (¤t_server_name, ¤t_listen_port) {
|
||||
let url = format!("http://{}:{}", name, port);
|
||||
sites.push((name.clone(), url));
|
||||
}
|
||||
in_server_block = false;
|
||||
} else if in_server_block {
|
||||
if line.starts_with("server_name ") {
|
||||
if let Some(name) = line.split_whitespace().nth(1) {
|
||||
current_server_name = Some(name.trim_end_matches(';').to_string());
|
||||
}
|
||||
} else if line.starts_with("listen ") {
|
||||
if let Some(listen_spec) = line.split_whitespace().nth(1) {
|
||||
let port_str = listen_spec.trim_end_matches(';').split(':').last().unwrap_or(listen_spec);
|
||||
if let Ok(port) = port_str.parse::<u16>() {
|
||||
current_listen_port = Some(port);
|
||||
}
|
||||
}
|
||||
while i < lines.len() {
|
||||
let line = lines[i].trim();
|
||||
if line.starts_with("server") && line.contains("{") {
|
||||
if let Some(server_name) = self.parse_server_block(&lines, &mut i) {
|
||||
let url = format!("https://{}", server_name);
|
||||
sites.push((server_name.clone(), url));
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
debug!("Discovered {} nginx sites total", sites.len());
|
||||
sites
|
||||
}
|
||||
|
||||
/// Check site latency via HTTP request
|
||||
/// Parse a server block to extract the primary server_name
|
||||
fn parse_server_block(&self, lines: &[&str], start_index: &mut usize) -> Option<String> {
|
||||
let mut server_names = Vec::new();
|
||||
let mut has_redirect = false;
|
||||
let mut i = *start_index + 1;
|
||||
let mut brace_count = 1;
|
||||
|
||||
// Parse until we close the server block
|
||||
while i < lines.len() && brace_count > 0 {
|
||||
let trimmed = lines[i].trim();
|
||||
|
||||
// Track braces
|
||||
brace_count += trimmed.matches('{').count();
|
||||
brace_count -= trimmed.matches('}').count();
|
||||
|
||||
// Extract server_name
|
||||
if trimmed.starts_with("server_name") {
|
||||
if let Some(names_part) = trimmed.strip_prefix("server_name") {
|
||||
let names_clean = names_part.trim().trim_end_matches(';');
|
||||
for name in names_clean.split_whitespace() {
|
||||
if name != "_"
|
||||
&& !name.is_empty()
|
||||
&& name.contains('.')
|
||||
&& !name.starts_with('$')
|
||||
{
|
||||
server_names.push(name.to_string());
|
||||
debug!("Found server_name in block: {}", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for redirects (skip redirect-only servers)
|
||||
if trimmed.contains("return") && (trimmed.contains("301") || trimmed.contains("302")) {
|
||||
has_redirect = true;
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
*start_index = i - 1;
|
||||
|
||||
if !server_names.is_empty() && !has_redirect {
|
||||
return Some(server_names[0].clone());
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Check site latency using HTTP GET requests
|
||||
fn check_site_latency(&self, url: &str) -> Result<f32, Box<dyn std::error::Error>> {
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
// Use curl for HTTP request with timeout
|
||||
let output = Command::new("curl")
|
||||
.args(&[
|
||||
"-s",
|
||||
"-o", "/dev/null",
|
||||
"-w", "%{http_code}",
|
||||
"--max-time", &self.config.http_timeout_seconds.to_string(),
|
||||
"--connect-timeout", &self.config.http_connect_timeout_seconds.to_string(),
|
||||
url
|
||||
])
|
||||
.output()?;
|
||||
// Create HTTP client with timeouts from configuration
|
||||
let client = reqwest::blocking::Client::builder()
|
||||
.timeout(Duration::from_secs(self.config.http_timeout_seconds))
|
||||
.connect_timeout(Duration::from_secs(self.config.http_connect_timeout_seconds))
|
||||
.redirect(reqwest::redirect::Policy::limited(10))
|
||||
.build()?;
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
let latency_ms = elapsed.as_secs_f32() * 1000.0;
|
||||
// Make GET request and measure latency
|
||||
let response = client.get(url).send()?;
|
||||
let latency = start.elapsed().as_millis() as f32;
|
||||
|
||||
if output.status.success() {
|
||||
let http_code = String::from_utf8_lossy(&output.stdout);
|
||||
if http_code.starts_with("2") || http_code.starts_with("3") {
|
||||
Ok(latency_ms)
|
||||
// Check if response is successful (2xx or 3xx status codes)
|
||||
if response.status().is_success() || response.status().is_redirection() {
|
||||
Ok(latency)
|
||||
} else {
|
||||
Err(format!("HTTP error: {}", http_code).into())
|
||||
}
|
||||
} else {
|
||||
Err("HTTP request failed".into())
|
||||
Err(format!(
|
||||
"HTTP request failed for {} with status: {}",
|
||||
url,
|
||||
response.status()
|
||||
)
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
@ -662,23 +782,40 @@ impl SystemdCollector {
|
||||
fn get_docker_containers(&self) -> Vec<(String, String)> {
|
||||
let mut containers = Vec::new();
|
||||
|
||||
// Check if docker is available
|
||||
let output = Command::new("docker")
|
||||
.args(&["ps", "--format", "{{.Names}}:{{.Status}}"])
|
||||
.args(&["ps", "--format", "{{.Names}},{{.Status}}"])
|
||||
.output();
|
||||
|
||||
if let Ok(output) = output {
|
||||
if output.status.success() {
|
||||
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||
for line in output_str.lines() {
|
||||
if let Some((name, status)) = line.split_once(':') {
|
||||
let container_status = if status.contains("Up") {
|
||||
"active"
|
||||
} else {
|
||||
"inactive"
|
||||
let output = match output {
|
||||
Ok(out) if out.status.success() => out,
|
||||
_ => return containers, // Docker not available or failed
|
||||
};
|
||||
containers.push((format!("docker_{}", name), container_status.to_string()));
|
||||
}
|
||||
|
||||
let output_str = match String::from_utf8(output.stdout) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return containers,
|
||||
};
|
||||
|
||||
for line in output_str.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = line.split(',').collect();
|
||||
if parts.len() >= 2 {
|
||||
let container_name = parts[0].trim();
|
||||
let status_str = parts[1].trim();
|
||||
|
||||
let container_status = if status_str.contains("Up") {
|
||||
"active"
|
||||
} else if status_str.contains("Exited") {
|
||||
"warning" // Match original: Exited → Warning, not inactive
|
||||
} else {
|
||||
"failed" // Other states → failed
|
||||
};
|
||||
|
||||
containers.push((format!("docker_{}", container_name), container_status.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.144"
|
||||
version = "0.1.145"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.144"
|
||||
version = "0.1.145"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user