Refactor services widget with unified system metrics display
- Rename alerts widget to hosts widget for clarity - Add sub_service field to ServiceInfo for display differentiation - Integrate system metrics (CPU load, memory, temperature, disk) as service rows - Convert nginx sites to individual sub-service rows with tree structure - Remove nginx site checkmarks - status now shown via row indicators - Update dashboard layout to display system and service data together - Maintain description lines for connection counts and service details Services widget now shows: - System metrics as regular service rows with status - Nginx sites as sub-services with ├─/└─ tree formatting - Regular services with full resource data and descriptions - Unified status indication across all row types
This commit is contained in:
@@ -114,6 +114,7 @@ impl ServiceCollector {
|
||||
sandbox_limit: None, // TODO: Implement sandbox limit detection
|
||||
disk_used_gb,
|
||||
description,
|
||||
sub_service: false,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -404,6 +405,226 @@ impl ServiceCollector {
|
||||
}
|
||||
}
|
||||
|
||||
async fn add_system_metrics_as_services(&self, services: &mut Vec<ServiceData>, healthy: &mut usize, degraded: &mut usize, failed: &mut usize) {
|
||||
// Get system metrics
|
||||
if let Ok((load_1, _load_5, _load_15)) = self.get_cpu_load().await {
|
||||
let cpu_status = self.determine_cpu_status(load_1);
|
||||
match cpu_status.as_str() {
|
||||
"ok" => *healthy += 1,
|
||||
"warning" => *degraded += 1,
|
||||
"critical" => *failed += 1,
|
||||
_ => *degraded += 1,
|
||||
}
|
||||
services.push(ServiceData {
|
||||
name: "CPU Load".to_string(),
|
||||
status: match cpu_status.as_str() {
|
||||
"ok" => ServiceStatus::Running,
|
||||
"warning" => ServiceStatus::Degraded,
|
||||
"critical" => ServiceStatus::Stopped,
|
||||
_ => ServiceStatus::Degraded,
|
||||
},
|
||||
memory_used_mb: 0.0,
|
||||
memory_quota_mb: 0.0,
|
||||
cpu_percent: load_1,
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: 0.0,
|
||||
description: None,
|
||||
sub_service: false,
|
||||
});
|
||||
}
|
||||
|
||||
if let Ok((memory_used_mb, memory_total_mb)) = self.get_memory_info().await {
|
||||
let memory_status = self.determine_memory_status(memory_used_mb, memory_total_mb);
|
||||
match memory_status.as_str() {
|
||||
"ok" => *healthy += 1,
|
||||
"warning" => *degraded += 1,
|
||||
"critical" => *failed += 1,
|
||||
_ => *degraded += 1,
|
||||
}
|
||||
services.push(ServiceData {
|
||||
name: "Service Memory".to_string(),
|
||||
status: match memory_status.as_str() {
|
||||
"ok" => ServiceStatus::Running,
|
||||
"warning" => ServiceStatus::Degraded,
|
||||
"critical" => ServiceStatus::Stopped,
|
||||
_ => ServiceStatus::Degraded,
|
||||
},
|
||||
memory_used_mb,
|
||||
memory_quota_mb: memory_total_mb,
|
||||
cpu_percent: 0.0,
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: 0.0,
|
||||
description: None,
|
||||
sub_service: false,
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(cpu_temp) = self.get_cpu_temperature().await {
|
||||
let temp_status = self.determine_cpu_temp_status(cpu_temp);
|
||||
match temp_status.as_str() {
|
||||
"ok" => *healthy += 1,
|
||||
"warning" => *degraded += 1,
|
||||
"critical" => *failed += 1,
|
||||
_ => *degraded += 1,
|
||||
}
|
||||
services.push(ServiceData {
|
||||
name: "CPU Temperature".to_string(),
|
||||
status: match temp_status.as_str() {
|
||||
"ok" => ServiceStatus::Running,
|
||||
"warning" => ServiceStatus::Degraded,
|
||||
"critical" => ServiceStatus::Stopped,
|
||||
_ => ServiceStatus::Degraded,
|
||||
},
|
||||
memory_used_mb: 0.0,
|
||||
memory_quota_mb: 0.0,
|
||||
cpu_percent: cpu_temp,
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: 0.0,
|
||||
description: None,
|
||||
sub_service: false,
|
||||
});
|
||||
}
|
||||
|
||||
if let Ok(disk_usage) = self.get_disk_usage().await {
|
||||
services.push(ServiceData {
|
||||
name: "Disk Usage".to_string(),
|
||||
status: ServiceStatus::Running,
|
||||
memory_used_mb: 0.0,
|
||||
memory_quota_mb: 0.0,
|
||||
cpu_percent: 0.0,
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: disk_usage.used_gb,
|
||||
description: None,
|
||||
sub_service: false,
|
||||
});
|
||||
*healthy += 1;
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_cpu_load(&self) -> Result<(f32, f32, f32), CollectorError> {
|
||||
let output = tokio::process::Command::new("uptime")
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| CollectorError::CommandFailed {
|
||||
command: "uptime".to_string(),
|
||||
message: e.to_string()
|
||||
})?;
|
||||
|
||||
let uptime_str = String::from_utf8_lossy(&output.stdout);
|
||||
|
||||
if let Some(load_part) = uptime_str.split("load average:").nth(1) {
|
||||
let load_str = load_part.trim();
|
||||
let loads: Vec<&str> = load_str.split(", ").collect();
|
||||
if loads.len() >= 3 {
|
||||
let load_1 = loads[0].trim().replace(',', ".").parse::<f32>()
|
||||
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 1min load".to_string() })?;
|
||||
let load_5 = loads[1].trim().replace(',', ".").parse::<f32>()
|
||||
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 5min load".to_string() })?;
|
||||
let load_15 = loads[2].trim().replace(',', ".").parse::<f32>()
|
||||
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 15min load".to_string() })?;
|
||||
|
||||
return Ok((load_1, load_5, load_15));
|
||||
}
|
||||
}
|
||||
|
||||
Err(CollectorError::ParseError { message: "Failed to parse load averages".to_string() })
|
||||
}
|
||||
|
||||
async fn get_cpu_temperature(&self) -> Option<f32> {
|
||||
for i in 0..10 {
|
||||
let type_path = format!("/sys/class/thermal/thermal_zone{}/type", i);
|
||||
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
|
||||
|
||||
if let (Ok(zone_type), Ok(temp_str)) = (
|
||||
fs::read_to_string(&type_path).await,
|
||||
fs::read_to_string(&temp_path).await,
|
||||
) {
|
||||
let zone_type = zone_type.trim();
|
||||
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
|
||||
let temp_c = temp_millic / 1000.0;
|
||||
if temp_c > 20.0 && temp_c < 150.0 {
|
||||
if zone_type == "x86_pkg_temp" || zone_type.contains("coretemp") {
|
||||
return Some(temp_c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..10 {
|
||||
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
|
||||
if let Ok(temp_str) = fs::read_to_string(&temp_path).await {
|
||||
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
|
||||
let temp_c = temp_millic / 1000.0;
|
||||
if temp_c > 20.0 && temp_c < 150.0 {
|
||||
return Some(temp_c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
async fn get_memory_info(&self) -> Result<(f32, f32), CollectorError> {
|
||||
let meminfo = fs::read_to_string("/proc/meminfo")
|
||||
.await
|
||||
.map_err(|e| CollectorError::IoError { message: format!("Failed to read /proc/meminfo: {}", e) })?;
|
||||
|
||||
let mut mem_total = None;
|
||||
let mut mem_available = None;
|
||||
|
||||
for line in meminfo.lines() {
|
||||
if let Some(value_str) = line.strip_prefix("MemTotal:") {
|
||||
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
|
||||
mem_total = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
|
||||
}
|
||||
} else if let Some(value_str) = line.strip_prefix("MemAvailable:") {
|
||||
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
|
||||
mem_available = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match (mem_total, mem_available) {
|
||||
(Some(total), Some(available)) => {
|
||||
let used = total - available;
|
||||
Ok((used, total))
|
||||
}
|
||||
_ => Err(CollectorError::ParseError { message: "Failed to parse memory info".to_string() }),
|
||||
}
|
||||
}
|
||||
|
||||
fn determine_cpu_status(&self, load_1: f32) -> String {
|
||||
if load_1 >= 8.0 {
|
||||
"critical".to_string()
|
||||
} else if load_1 >= 5.0 {
|
||||
"warning".to_string()
|
||||
} else {
|
||||
"ok".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn determine_memory_status(&self, used_mb: f32, total_mb: f32) -> String {
|
||||
let usage_percent = (used_mb / total_mb) * 100.0;
|
||||
if usage_percent >= 95.0 {
|
||||
"critical".to_string()
|
||||
} else if usage_percent >= 80.0 {
|
||||
"warning".to_string()
|
||||
} else {
|
||||
"ok".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn determine_cpu_temp_status(&self, temp_c: f32) -> String {
|
||||
if temp_c >= 100.0 {
|
||||
"critical".to_string()
|
||||
} else if temp_c >= 100.0 {
|
||||
"warning".to_string()
|
||||
} else {
|
||||
"ok".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_service_description_with_cache(&self, service: &str) -> Option<Vec<String>> {
|
||||
// Check if we should update the cache (throttled)
|
||||
let should_update = self.should_update_description(service).await;
|
||||
@@ -676,7 +897,7 @@ impl ServiceCollector {
|
||||
let mut accessible_sites = Vec::new();
|
||||
for site in sites {
|
||||
if self.check_site_accessibility(&site).await {
|
||||
accessible_sites.push(format!("{} ✓", site));
|
||||
accessible_sites.push(site); // Remove checkmark - status will be shown via sub_service row status
|
||||
}
|
||||
}
|
||||
|
||||
@@ -791,6 +1012,9 @@ impl Collector for ServiceCollector {
|
||||
let mut total_memory_quota = 0.0;
|
||||
let mut total_disk_used = 0.0;
|
||||
|
||||
// Add system metrics as service entries first
|
||||
self.add_system_metrics_as_services(&mut services, &mut healthy, &mut degraded, &mut failed).await;
|
||||
|
||||
// Collect data from all configured services
|
||||
for service in &self.services {
|
||||
match self.get_service_status(service).await {
|
||||
@@ -807,7 +1031,39 @@ impl Collector for ServiceCollector {
|
||||
}
|
||||
total_disk_used += service_data.disk_used_gb;
|
||||
|
||||
services.push(service_data);
|
||||
// Handle nginx specially - create sub-services for sites
|
||||
if service == "nginx" && matches!(service_data.status, ServiceStatus::Running) {
|
||||
// Clear nginx description - sites will become individual sub-services
|
||||
let mut nginx_service = service_data;
|
||||
nginx_service.description = None;
|
||||
services.push(nginx_service);
|
||||
|
||||
// Add nginx sites as individual sub-services
|
||||
if let Some(sites) = self.get_nginx_sites().await {
|
||||
for (i, site) in sites.iter().enumerate() {
|
||||
let site_name = if i == sites.len() - 1 {
|
||||
format!("└─ {}", site)
|
||||
} else {
|
||||
format!("├─ {}", site)
|
||||
};
|
||||
|
||||
services.push(ServiceData {
|
||||
name: site_name,
|
||||
status: ServiceStatus::Running, // Assume sites are running if nginx is running
|
||||
memory_used_mb: 0.0,
|
||||
memory_quota_mb: 0.0,
|
||||
cpu_percent: 0.0,
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: 0.0,
|
||||
description: None,
|
||||
sub_service: true,
|
||||
});
|
||||
healthy += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
services.push(service_data);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
failed += 1;
|
||||
@@ -821,6 +1077,7 @@ impl Collector for ServiceCollector {
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: 0.0,
|
||||
description: None,
|
||||
sub_service: false,
|
||||
});
|
||||
tracing::warn!("Failed to collect metrics for service {}: {}", service, e);
|
||||
}
|
||||
@@ -879,6 +1136,8 @@ struct ServiceData {
|
||||
disk_used_gb: f32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
description: Option<Vec<String>>,
|
||||
#[serde(default)]
|
||||
sub_service: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
|
||||
Reference in New Issue
Block a user