This commit is contained in:
Christoffer Martinsson 2025-10-12 18:03:32 +02:00
parent 49aee702f2
commit b226217ba4
7 changed files with 28 additions and 67 deletions

View File

@ -514,7 +514,6 @@ impl ServiceCollector {
let should_update = self.should_update_description(service).await;
if should_update {
tracing::debug!("Service {} updating description (throttle check passed)", service);
if let Some(new_description) = self.get_service_description(service).await {
// Update cache
let mut cache = self.description_cache.lock().await;
@ -546,16 +545,11 @@ impl ServiceCollector {
let should_update = (now + service_offset) % update_interval == 0;
if !should_update {
let next_update = update_interval - ((now + service_offset) % update_interval);
tracing::trace!("Service {} throttled, next update in {} seconds", service, next_update);
}
should_update
}
async fn get_service_description(&self, service: &str) -> Option<Vec<String>> {
tracing::debug!("Getting description for service: {}", service);
let result = match service {
"sshd" | "ssh" => self.get_ssh_active_users().await.map(|s| vec![s]),
"nginx" => self.get_nginx_sites().await,
@ -566,11 +560,6 @@ impl ServiceCollector {
_ => None,
};
match &result {
Some(descriptions) => tracing::info!("Service {} got {} descriptions: {:?}", service, descriptions.len(), descriptions),
None => tracing::debug!("Service {} got no description", service),
}
result
}
@ -712,16 +701,11 @@ impl ServiceCollector {
}
async fn get_nginx_sites(&self) -> Option<Vec<String>> {
tracing::debug!("Starting nginx site detection");
// Get the actual nginx config file path from systemd (NixOS uses custom config)
let config_path = match self.get_nginx_config_from_systemd().await {
Some(path) => {
tracing::debug!("Found nginx config path from systemd: {}", path);
path
}
Some(path) => path,
None => {
tracing::warn!("Could not find nginx config path from systemd, using default");
// Fallback to default nginx -T
match Command::new("sudo")
.args(["nginx", "-T"])
@ -732,15 +716,12 @@ impl ServiceCollector {
{
Ok(output) => {
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
tracing::warn!("nginx -T failed with status {}: {}", output.status, stderr);
return None;
}
let config = String::from_utf8_lossy(&output.stdout);
return self.parse_nginx_config(&config).await;
}
Err(e) => {
tracing::warn!("Failed to execute sudo nginx -T: {}", e);
Err(_) => {
return None;
}
}
@ -756,20 +737,16 @@ impl ServiceCollector {
.await
{
Ok(output) => output,
Err(e) => {
tracing::warn!("Failed to execute sudo nginx -T -c {}: {}", config_path, e);
Err(_) => {
return None;
}
};
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
tracing::warn!("nginx -T failed with status {}: {}", output.status, stderr);
return None;
}
let config = String::from_utf8_lossy(&output.stdout);
tracing::debug!("Got nginx config, {} bytes", config.len());
self.parse_nginx_config(&config).await
}
@ -824,7 +801,6 @@ impl ServiceCollector {
i += 1;
}
tracing::info!("Found {} potential sites, checking accessibility", sites.len());
// Check which sites are actually accessible
let mut accessible_sites = Vec::new();
@ -837,10 +813,7 @@ impl ServiceCollector {
// Limit to reasonable number
accessible_sites.truncate(15);
tracing::info!("Final accessible nginx sites: {:?}", accessible_sites);
if accessible_sites.is_empty() {
tracing::warn!("No accessible nginx sites found");
None
} else {
Some(accessible_sites)
@ -913,7 +886,6 @@ impl ServiceCollector {
response.contains(" 302 ") ||
response.contains(" 403 ") // Some sites return 403 but are still "accessible"
) {
tracing::debug!("Site {} accessible via {}", hostname, scheme);
return true;
}
}
@ -921,7 +893,6 @@ impl ServiceCollector {
}
}
tracing::debug!("Site {} not accessible", hostname);
false
}
}

View File

@ -83,7 +83,7 @@ impl Default for AgentConfig {
},
service: ServiceCollectorConfig {
enabled: true,
interval_ms: 2000,
interval_ms: 5000,
services: vec![
"gitea".to_string(),
"immich".to_string(),
@ -263,18 +263,18 @@ impl AgentConfig {
fn apply_host_timing_overrides(&mut self, hostname: &str) {
match hostname {
"srv01" => {
// Server host - more frequent monitoring
self.collectors.service.interval_ms = 1000;
// Server host - standard 5 second monitoring
self.collectors.service.interval_ms = 5000;
self.collectors.smart.interval_ms = 5000;
}
"cmbox" | "labbox" | "simonbox" | "steambox" => {
// Workstation hosts - less frequent monitoring
self.collectors.smart.interval_ms = 10000;
// Workstation hosts - standard 5 second monitoring
self.collectors.smart.interval_ms = 5000;
self.collectors.service.interval_ms = 5000;
}
_ => {
// Unknown host - conservative defaults
self.collectors.smart.interval_ms = 10000;
// Unknown host - standard 5 second monitoring
self.collectors.smart.interval_ms = 5000;
self.collectors.service.interval_ms = 5000;
}
}

View File

@ -40,7 +40,7 @@ timeout_ms = 30000
enabled = true
# Collection interval in milliseconds (minimum 500ms)
interval_ms = 2000
interval_ms = 5000
# List of systemd services to monitor
services = [

View File

@ -20,10 +20,7 @@ pub fn render(frame: &mut Frame, hosts: &[HostDisplayData], area: Rect) {
color = Color::Gray;
}
let title = format!(
"Alerts • ok:{} warn:{} fail:{}",
ok_count, warn_count, fail_count
);
let title = "Alerts".to_string();
let widget_status = match severity {
AlertSeverity::Critical => StatusLevel::Error,

View File

@ -32,10 +32,7 @@ fn render_metrics(
) {
let summary = &metrics.summary;
let color = summary_color(summary);
let title = format!(
"Services • ok:{} warn:{} fail:{}",
summary.healthy, summary.degraded, summary.failed
);
let title = "Services".to_string();
let widget_status = if summary.failed > 0 {
StatusLevel::Error
@ -48,7 +45,7 @@ fn render_metrics(
let mut data = WidgetData::new(
title,
Some(WidgetStatus::new(widget_status)),
vec!["Service".to_string(), "Memory".to_string(), "CPU".to_string(), "Disk".to_string()]
vec!["Service".to_string(), "Memory (GB)".to_string(), "CPU".to_string(), "Disk".to_string()]
);
@ -133,35 +130,34 @@ fn summary_color(summary: &ServiceSummary) -> Color {
}
fn format_memory_value(used: f32, quota: f32) -> String {
let used_gb = used / 1000.0;
let quota_gb = quota / 1000.0;
if quota > 0.05 {
format!("{:.1}/{:.1} MiB", used, quota)
format!("{:.1}/{:.1} GB", used_gb, quota_gb)
} else if used > 0.05 {
format!("{:.1} MiB", used)
format!("{:.1} GB", used_gb)
} else {
"".to_string()
"0.0 GB".to_string()
}
}
fn format_cpu_value(cpu_percent: f32) -> String {
if cpu_percent >= 0.1 {
format!("{:.1}%", cpu_percent)
} else if cpu_percent > 0.0 {
"<0.1%".to_string()
} else {
"".to_string()
"0.0%".to_string()
}
}
fn format_disk_value(used: f32) -> String {
if used >= 1.0 {
format!("{:.1} GiB", used)
format!("{:.1} GB", used)
} else if used >= 0.001 {
// 1 MB or more
format!("{:.0} MiB", used * 1024.0)
} else if used > 0.0 {
format!("<1 MiB")
format!("{:.0} MB", used * 1000.0)
} else {
"".to_string()
"<1 MB".to_string()
}
}

View File

@ -26,10 +26,7 @@ pub fn render(frame: &mut Frame, host: Option<&HostDisplayData>, area: Rect) {
fn render_metrics(frame: &mut Frame, _host: &HostDisplayData, metrics: &SmartMetrics, area: Rect) {
let color = smart_status_color(&metrics.status);
let title = format!(
"Storage • ok:{} warn:{} crit:{}",
metrics.summary.healthy, metrics.summary.warning, metrics.summary.critical
);
let title = "Storage".to_string();
let widget_status = if metrics.summary.critical > 0 {
StatusLevel::Error

View File

@ -78,7 +78,7 @@ fn render_metrics(
memory_dataset.add_row(
Some(WidgetStatus::new(memory_status)),
vec![],
vec![format!("{:.1} / {:.1}", system_used, system_total)],
vec![format!("{:.1} / {:.1} GB", system_used / 1000.0, system_total / 1000.0)],
);
// CPU dataset
@ -121,7 +121,7 @@ fn render_metrics(
let overall_status = Some(WidgetStatus::new(overall_status_level));
// Render all three datasets in a single combined widget
render_combined_widget_data(frame, area, "CPU / Memory".to_string(), overall_status, vec![memory_dataset, cpu_dataset, gpu_dataset]);
render_combined_widget_data(frame, area, "System".to_string(), overall_status, vec![memory_dataset, cpu_dataset, gpu_dataset]);
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]