Refactor services widget with unified system metrics display

- Rename alerts widget to hosts widget for clarity
- Add sub_service field to ServiceInfo for display differentiation
- Integrate system metrics (CPU load, memory, temperature, disk) as service rows
- Convert nginx sites to individual sub-service rows with tree structure
- Remove nginx site checkmarks - status now shown via row indicators
- Update dashboard layout to display system and service data together
- Maintain description lines for connection counts and service details

Services widget now shows:
- System metrics as regular service rows with status
- Nginx sites as sub-services with ├─/└─ tree formatting
- Regular services with full resource data and descriptions
- Unified status indication across all row types
This commit is contained in:
2025-10-13 08:10:38 +02:00
parent c68ccf023e
commit bab387c74d
7 changed files with 419 additions and 89 deletions

View File

@@ -114,6 +114,7 @@ impl ServiceCollector {
sandbox_limit: None, // TODO: Implement sandbox limit detection
disk_used_gb,
description,
sub_service: false,
})
}
@@ -404,6 +405,226 @@ impl ServiceCollector {
}
}
async fn add_system_metrics_as_services(&self, services: &mut Vec<ServiceData>, healthy: &mut usize, degraded: &mut usize, failed: &mut usize) {
// Get system metrics
if let Ok((load_1, _load_5, _load_15)) = self.get_cpu_load().await {
let cpu_status = self.determine_cpu_status(load_1);
match cpu_status.as_str() {
"ok" => *healthy += 1,
"warning" => *degraded += 1,
"critical" => *failed += 1,
_ => *degraded += 1,
}
services.push(ServiceData {
name: "CPU Load".to_string(),
status: match cpu_status.as_str() {
"ok" => ServiceStatus::Running,
"warning" => ServiceStatus::Degraded,
"critical" => ServiceStatus::Stopped,
_ => ServiceStatus::Degraded,
},
memory_used_mb: 0.0,
memory_quota_mb: 0.0,
cpu_percent: load_1,
sandbox_limit: None,
disk_used_gb: 0.0,
description: None,
sub_service: false,
});
}
if let Ok((memory_used_mb, memory_total_mb)) = self.get_memory_info().await {
let memory_status = self.determine_memory_status(memory_used_mb, memory_total_mb);
match memory_status.as_str() {
"ok" => *healthy += 1,
"warning" => *degraded += 1,
"critical" => *failed += 1,
_ => *degraded += 1,
}
services.push(ServiceData {
name: "Service Memory".to_string(),
status: match memory_status.as_str() {
"ok" => ServiceStatus::Running,
"warning" => ServiceStatus::Degraded,
"critical" => ServiceStatus::Stopped,
_ => ServiceStatus::Degraded,
},
memory_used_mb,
memory_quota_mb: memory_total_mb,
cpu_percent: 0.0,
sandbox_limit: None,
disk_used_gb: 0.0,
description: None,
sub_service: false,
});
}
if let Some(cpu_temp) = self.get_cpu_temperature().await {
let temp_status = self.determine_cpu_temp_status(cpu_temp);
match temp_status.as_str() {
"ok" => *healthy += 1,
"warning" => *degraded += 1,
"critical" => *failed += 1,
_ => *degraded += 1,
}
services.push(ServiceData {
name: "CPU Temperature".to_string(),
status: match temp_status.as_str() {
"ok" => ServiceStatus::Running,
"warning" => ServiceStatus::Degraded,
"critical" => ServiceStatus::Stopped,
_ => ServiceStatus::Degraded,
},
memory_used_mb: 0.0,
memory_quota_mb: 0.0,
cpu_percent: cpu_temp,
sandbox_limit: None,
disk_used_gb: 0.0,
description: None,
sub_service: false,
});
}
if let Ok(disk_usage) = self.get_disk_usage().await {
services.push(ServiceData {
name: "Disk Usage".to_string(),
status: ServiceStatus::Running,
memory_used_mb: 0.0,
memory_quota_mb: 0.0,
cpu_percent: 0.0,
sandbox_limit: None,
disk_used_gb: disk_usage.used_gb,
description: None,
sub_service: false,
});
*healthy += 1;
}
}
async fn get_cpu_load(&self) -> Result<(f32, f32, f32), CollectorError> {
let output = tokio::process::Command::new("uptime")
.output()
.await
.map_err(|e| CollectorError::CommandFailed {
command: "uptime".to_string(),
message: e.to_string()
})?;
let uptime_str = String::from_utf8_lossy(&output.stdout);
if let Some(load_part) = uptime_str.split("load average:").nth(1) {
let load_str = load_part.trim();
let loads: Vec<&str> = load_str.split(", ").collect();
if loads.len() >= 3 {
let load_1 = loads[0].trim().replace(',', ".").parse::<f32>()
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 1min load".to_string() })?;
let load_5 = loads[1].trim().replace(',', ".").parse::<f32>()
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 5min load".to_string() })?;
let load_15 = loads[2].trim().replace(',', ".").parse::<f32>()
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 15min load".to_string() })?;
return Ok((load_1, load_5, load_15));
}
}
Err(CollectorError::ParseError { message: "Failed to parse load averages".to_string() })
}
async fn get_cpu_temperature(&self) -> Option<f32> {
for i in 0..10 {
let type_path = format!("/sys/class/thermal/thermal_zone{}/type", i);
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
if let (Ok(zone_type), Ok(temp_str)) = (
fs::read_to_string(&type_path).await,
fs::read_to_string(&temp_path).await,
) {
let zone_type = zone_type.trim();
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
let temp_c = temp_millic / 1000.0;
if temp_c > 20.0 && temp_c < 150.0 {
if zone_type == "x86_pkg_temp" || zone_type.contains("coretemp") {
return Some(temp_c);
}
}
}
}
}
for i in 0..10 {
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
if let Ok(temp_str) = fs::read_to_string(&temp_path).await {
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
let temp_c = temp_millic / 1000.0;
if temp_c > 20.0 && temp_c < 150.0 {
return Some(temp_c);
}
}
}
}
None
}
async fn get_memory_info(&self) -> Result<(f32, f32), CollectorError> {
let meminfo = fs::read_to_string("/proc/meminfo")
.await
.map_err(|e| CollectorError::IoError { message: format!("Failed to read /proc/meminfo: {}", e) })?;
let mut mem_total = None;
let mut mem_available = None;
for line in meminfo.lines() {
if let Some(value_str) = line.strip_prefix("MemTotal:") {
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
mem_total = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
}
} else if let Some(value_str) = line.strip_prefix("MemAvailable:") {
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
mem_available = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
}
}
}
match (mem_total, mem_available) {
(Some(total), Some(available)) => {
let used = total - available;
Ok((used, total))
}
_ => Err(CollectorError::ParseError { message: "Failed to parse memory info".to_string() }),
}
}
fn determine_cpu_status(&self, load_1: f32) -> String {
if load_1 >= 8.0 {
"critical".to_string()
} else if load_1 >= 5.0 {
"warning".to_string()
} else {
"ok".to_string()
}
}
fn determine_memory_status(&self, used_mb: f32, total_mb: f32) -> String {
let usage_percent = (used_mb / total_mb) * 100.0;
if usage_percent >= 95.0 {
"critical".to_string()
} else if usage_percent >= 80.0 {
"warning".to_string()
} else {
"ok".to_string()
}
}
fn determine_cpu_temp_status(&self, temp_c: f32) -> String {
if temp_c >= 100.0 {
"critical".to_string()
} else if temp_c >= 100.0 {
"warning".to_string()
} else {
"ok".to_string()
}
}
async fn get_service_description_with_cache(&self, service: &str) -> Option<Vec<String>> {
// Check if we should update the cache (throttled)
let should_update = self.should_update_description(service).await;
@@ -676,7 +897,7 @@ impl ServiceCollector {
let mut accessible_sites = Vec::new();
for site in sites {
if self.check_site_accessibility(&site).await {
accessible_sites.push(format!("{}", site));
accessible_sites.push(site); // Remove checkmark - status will be shown via sub_service row status
}
}
@@ -791,6 +1012,9 @@ impl Collector for ServiceCollector {
let mut total_memory_quota = 0.0;
let mut total_disk_used = 0.0;
// Add system metrics as service entries first
self.add_system_metrics_as_services(&mut services, &mut healthy, &mut degraded, &mut failed).await;
// Collect data from all configured services
for service in &self.services {
match self.get_service_status(service).await {
@@ -807,7 +1031,39 @@ impl Collector for ServiceCollector {
}
total_disk_used += service_data.disk_used_gb;
services.push(service_data);
// Handle nginx specially - create sub-services for sites
if service == "nginx" && matches!(service_data.status, ServiceStatus::Running) {
// Clear nginx description - sites will become individual sub-services
let mut nginx_service = service_data;
nginx_service.description = None;
services.push(nginx_service);
// Add nginx sites as individual sub-services
if let Some(sites) = self.get_nginx_sites().await {
for (i, site) in sites.iter().enumerate() {
let site_name = if i == sites.len() - 1 {
format!("└─ {}", site)
} else {
format!("├─ {}", site)
};
services.push(ServiceData {
name: site_name,
status: ServiceStatus::Running, // Assume sites are running if nginx is running
memory_used_mb: 0.0,
memory_quota_mb: 0.0,
cpu_percent: 0.0,
sandbox_limit: None,
disk_used_gb: 0.0,
description: None,
sub_service: true,
});
healthy += 1;
}
}
} else {
services.push(service_data);
}
}
Err(e) => {
failed += 1;
@@ -821,6 +1077,7 @@ impl Collector for ServiceCollector {
sandbox_limit: None,
disk_used_gb: 0.0,
description: None,
sub_service: false,
});
tracing::warn!("Failed to collect metrics for service {}: {}", service, e);
}
@@ -879,6 +1136,8 @@ struct ServiceData {
disk_used_gb: f32,
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<Vec<String>>,
#[serde(default)]
sub_service: bool,
}
#[derive(Debug, Clone, Serialize)]