This commit is contained in:
Christoffer Martinsson 2025-10-13 08:31:18 +02:00
parent 3de1e0db19
commit 5e8a0ce108
4 changed files with 33 additions and 231 deletions

View File

@ -405,225 +405,6 @@ impl ServiceCollector {
}
}
async fn add_system_metrics_as_services(&self, services: &mut Vec<ServiceData>, healthy: &mut usize, degraded: &mut usize, failed: &mut usize) {
// Get system metrics
if let Ok((load_1, _load_5, _load_15)) = self.get_cpu_load().await {
let cpu_status = self.determine_cpu_status(load_1);
match cpu_status.as_str() {
"ok" => *healthy += 1,
"warning" => *degraded += 1,
"critical" => *failed += 1,
_ => *degraded += 1,
}
services.push(ServiceData {
name: "CPU Load".to_string(),
status: match cpu_status.as_str() {
"ok" => ServiceStatus::Running,
"warning" => ServiceStatus::Degraded,
"critical" => ServiceStatus::Stopped,
_ => ServiceStatus::Degraded,
},
memory_used_mb: 0.0,
memory_quota_mb: 0.0,
cpu_percent: load_1,
sandbox_limit: None,
disk_used_gb: 0.0,
description: None,
sub_service: false,
});
}
if let Ok((memory_used_mb, memory_total_mb)) = self.get_memory_info().await {
let memory_status = self.determine_memory_status(memory_used_mb, memory_total_mb);
match memory_status.as_str() {
"ok" => *healthy += 1,
"warning" => *degraded += 1,
"critical" => *failed += 1,
_ => *degraded += 1,
}
services.push(ServiceData {
name: "Service Memory".to_string(),
status: match memory_status.as_str() {
"ok" => ServiceStatus::Running,
"warning" => ServiceStatus::Degraded,
"critical" => ServiceStatus::Stopped,
_ => ServiceStatus::Degraded,
},
memory_used_mb,
memory_quota_mb: memory_total_mb,
cpu_percent: 0.0,
sandbox_limit: None,
disk_used_gb: 0.0,
description: None,
sub_service: false,
});
}
if let Some(cpu_temp) = self.get_cpu_temperature().await {
let temp_status = self.determine_cpu_temp_status(cpu_temp);
match temp_status.as_str() {
"ok" => *healthy += 1,
"warning" => *degraded += 1,
"critical" => *failed += 1,
_ => *degraded += 1,
}
services.push(ServiceData {
name: "CPU Temperature".to_string(),
status: match temp_status.as_str() {
"ok" => ServiceStatus::Running,
"warning" => ServiceStatus::Degraded,
"critical" => ServiceStatus::Stopped,
_ => ServiceStatus::Degraded,
},
memory_used_mb: 0.0,
memory_quota_mb: 0.0,
cpu_percent: cpu_temp,
sandbox_limit: None,
disk_used_gb: 0.0,
description: None,
sub_service: false,
});
}
if let Ok(disk_usage) = self.get_disk_usage().await {
services.push(ServiceData {
name: "Disk Usage".to_string(),
status: ServiceStatus::Running,
memory_used_mb: 0.0,
memory_quota_mb: 0.0,
cpu_percent: 0.0,
sandbox_limit: None,
disk_used_gb: disk_usage.used_gb,
description: None,
sub_service: false,
});
*healthy += 1;
}
}
async fn get_cpu_load(&self) -> Result<(f32, f32, f32), CollectorError> {
let output = tokio::process::Command::new("uptime")
.output()
.await
.map_err(|e| CollectorError::CommandFailed {
command: "uptime".to_string(),
message: e.to_string()
})?;
let uptime_str = String::from_utf8_lossy(&output.stdout);
if let Some(load_part) = uptime_str.split("load average:").nth(1) {
let load_str = load_part.trim();
let loads: Vec<&str> = load_str.split(", ").collect();
if loads.len() >= 3 {
let load_1 = loads[0].trim().replace(',', ".").parse::<f32>()
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 1min load".to_string() })?;
let load_5 = loads[1].trim().replace(',', ".").parse::<f32>()
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 5min load".to_string() })?;
let load_15 = loads[2].trim().replace(',', ".").parse::<f32>()
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 15min load".to_string() })?;
return Ok((load_1, load_5, load_15));
}
}
Err(CollectorError::ParseError { message: "Failed to parse load averages".to_string() })
}
async fn get_cpu_temperature(&self) -> Option<f32> {
for i in 0..10 {
let type_path = format!("/sys/class/thermal/thermal_zone{}/type", i);
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
if let (Ok(zone_type), Ok(temp_str)) = (
fs::read_to_string(&type_path).await,
fs::read_to_string(&temp_path).await,
) {
let zone_type = zone_type.trim();
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
let temp_c = temp_millic / 1000.0;
if temp_c > 20.0 && temp_c < 150.0 {
if zone_type == "x86_pkg_temp" || zone_type.contains("coretemp") {
return Some(temp_c);
}
}
}
}
}
for i in 0..10 {
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
if let Ok(temp_str) = fs::read_to_string(&temp_path).await {
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
let temp_c = temp_millic / 1000.0;
if temp_c > 20.0 && temp_c < 150.0 {
return Some(temp_c);
}
}
}
}
None
}
async fn get_memory_info(&self) -> Result<(f32, f32), CollectorError> {
let meminfo = fs::read_to_string("/proc/meminfo")
.await
.map_err(|e| CollectorError::IoError { message: format!("Failed to read /proc/meminfo: {}", e) })?;
let mut mem_total = None;
let mut mem_available = None;
for line in meminfo.lines() {
if let Some(value_str) = line.strip_prefix("MemTotal:") {
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
mem_total = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
}
} else if let Some(value_str) = line.strip_prefix("MemAvailable:") {
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
mem_available = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
}
}
}
match (mem_total, mem_available) {
(Some(total), Some(available)) => {
let used = total - available;
Ok((used, total))
}
_ => Err(CollectorError::ParseError { message: "Failed to parse memory info".to_string() }),
}
}
fn determine_cpu_status(&self, load_1: f32) -> String {
if load_1 >= 8.0 {
"critical".to_string()
} else if load_1 >= 5.0 {
"warning".to_string()
} else {
"ok".to_string()
}
}
fn determine_memory_status(&self, used_mb: f32, total_mb: f32) -> String {
let usage_percent = (used_mb / total_mb) * 100.0;
if usage_percent >= 95.0 {
"critical".to_string()
} else if usage_percent >= 80.0 {
"warning".to_string()
} else {
"ok".to_string()
}
}
fn determine_cpu_temp_status(&self, temp_c: f32) -> String {
if temp_c >= 100.0 {
"critical".to_string()
} else if temp_c >= 100.0 {
"warning".to_string()
} else {
"ok".to_string()
}
}
async fn get_service_description_with_cache(&self, service: &str) -> Option<Vec<String>> {
// Check if we should update the cache (throttled)
@ -1012,9 +793,6 @@ impl Collector for ServiceCollector {
let mut total_memory_quota = 0.0;
let mut total_disk_used = 0.0;
// Add system metrics as service entries first
self.add_system_metrics_as_services(&mut services, &mut healthy, &mut degraded, &mut failed).await;
// Collect data from all configured services
for service in &self.services {
match self.get_service_status(service).await {

View File

@ -336,7 +336,7 @@ impl App {
fn check_host_timeouts(&mut self) {
let now = Utc::now();
for (host_name, state) in self.host_states.iter_mut() {
for (_host_name, state) in self.host_states.iter_mut() {
if let Some(last_success) = state.last_success {
let duration_since_last = now.signed_duration_since(last_success);

View File

@ -69,12 +69,42 @@ fn render_metrics(
return;
}
let mut services = metrics.services.clone();
services.sort_by(|a, b| {
// Sort services but preserve nginx + sites grouping
let mut services = Vec::new();
let mut regular_services = Vec::new();
let mut nginx_group = Vec::new();
let mut in_nginx_group = false;
for svc in metrics.services.iter() {
if svc.name == "nginx" {
// Start nginx group
in_nginx_group = true;
nginx_group.push(svc.clone());
} else if in_nginx_group && svc.sub_service {
// Add nginx site to group
nginx_group.push(svc.clone());
} else {
// End nginx group if we were in one
if in_nginx_group {
in_nginx_group = false;
services.append(&mut nginx_group);
}
regular_services.push(svc.clone());
}
}
// Add any remaining nginx group
if !nginx_group.is_empty() {
services.append(&mut nginx_group);
}
// Sort regular services and add them
regular_services.sort_by(|a, b| {
status_weight(&a.status)
.cmp(&status_weight(&b.status))
.then_with(|| a.name.cmp(&b.name))
});
services.append(&mut regular_services);
for svc in services {
let status_level = match svc.status {

View File

@ -166,9 +166,3 @@ fn format_optional_metric(value: Option<f32>, unit: &str) -> String {
}
}
fn format_optional_percent(value: Option<f32>) -> String {
match value {
Some(number) => format!("{:.0}%", number),
None => "".to_string(),
}
}