Refactor services widget with unified system metrics display
- Rename alerts widget to hosts widget for clarity - Add sub_service field to ServiceInfo for display differentiation - Integrate system metrics (CPU load, memory, temperature, disk) as service rows - Convert nginx sites to individual sub-service rows with tree structure - Remove nginx site checkmarks - status now shown via row indicators - Update dashboard layout to display system and service data together - Maintain description lines for connection counts and service details Services widget now shows: - System metrics as regular service rows with status - Nginx sites as sub-services with ├─/└─ tree formatting - Regular services with full resource data and descriptions - Unified status indication across all row types
This commit is contained in:
parent
c68ccf023e
commit
bab387c74d
104
README.md
104
README.md
@ -3,28 +3,39 @@
|
||||
A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure. Built to replace Glance with a custom solution tailored for specific monitoring needs and API integrations. Features real-time monitoring of all infrastructure components with intelligent email notifications and automatic status calculation.
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ CM Dashboard • cmbox │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ Storage • ok:1 warn:0 crit:0 │ Services • ok:1 warn:0 fail:0 │
|
||||
│ ┌─────────────────────────────────┐ │ ┌─────────────────────────────── │ │
|
||||
│ │Drive Temp Wear Spare Hours │ │ │Service memory: 7.1/23899.7 MiB│ │
|
||||
│ │nvme0n1 28°C 1% 100% 14489 │ │ │Disk usage: — │ │
|
||||
│ │ Capacity Usage │ │ │ Service Memory Disk │ │
|
||||
│ │ 954G 77G (8%) │ │ │✔ sshd 7.1 MiB — │ │
|
||||
│ └─────────────────────────────────┘ │ └─────────────────────────────── │ │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ CPU / Memory • warn │ Backups │
|
||||
│ System memory: 5251.7/23899.7 MiB │ Host cmbox awaiting backup │ │
|
||||
│ CPU load (1/5/15): 2.18 2.66 2.56 │ metrics │ │
|
||||
│ CPU freq: 1100.1 MHz │ │ │
|
||||
│ CPU temp: 47.0°C │ │ │
|
||||
├─────────────────────────────────────────────────────────────────────┤
|
||||
│ Alerts • ok:0 warn:3 fail:0 │ Status • ZMQ connected │
|
||||
│ cmbox: warning: CPU load 2.18 │ Monitoring • hosts: 3 │ │
|
||||
│ srv01: pending: awaiting metrics │ Data source: ZMQ – connected │ │
|
||||
│ labbox: pending: awaiting metrics │ Active host: cmbox (1/3) │ │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
CM Dashboard • srv01
|
||||
┌System───────────────────────────────────────────────────────┐┌Services────────────────────────────────────────────────────┐
|
||||
│ Memory usage ││ Service Memory (GB) CPU Disk │
|
||||
│✔ 3.0 / 7.8 GB ││✔ Service Memory 7.1/23899.7 MiB — │
|
||||
│ CPU load CPU temp ││✔ Disk Usage — — 45/100 GB │
|
||||
│✔ 1.05 • 0.96 • 0.58 64.0°C ││⚠ CPU Load — 2.18 — │
|
||||
│ C1E C3 C6 C8 C9 C10 ││✔ CPU Temperature — 47.0°C — │
|
||||
│✔ 0.5% 0.5% 10.4% 10.2% 0.4% 77.9% ││✔ docker-registry 0.0 GB 0.0% <1 MB │
|
||||
│ GPU load GPU temp ││✔ gitea 0.4/4.1 GB 0.2% 970 MB │
|
||||
└─────────────────────────────────────────────────────────────┘│ 1 active connections │
|
||||
┌Storage──────────────────────────────────────────────────────┐│✔ nginx 0.0/1.0 GB 0.0% <1 MB │
|
||||
│ Drive Temp Wear Spare Hours Capacity Usage ││✔ ├─ docker.cmtec.se │
|
||||
│✔ nvme0n1 57°C 4% 100% 11463 932G 23G (2%) ││✔ ├─ git.cmtec.se │
|
||||
│ ││✔ ├─ gitea.cmtec.se │
|
||||
│ ││✔ ├─ haasp.cmtec.se │
|
||||
│ ││✔ ├─ pages.cmtec.se │
|
||||
│ ││✔ ├─ photos.cmtec.se │
|
||||
└─────────────────────────────────────────────────────────────┘│✔ └─ www.kryddorten.se │
|
||||
┌Backups──────────────────────────────────────────────────────┐│✔ postgresql 0.1 GB 0.0% 378 MB │
|
||||
│ Backup Status Details ││ 1 active connections │
|
||||
│✔ Latest 3h ago 1.4 GiB ││✔ redis-immich 0.0 GB 0.4% <1 MB │
|
||||
│ 8 archives, 2.4 GiB total ││✔ sshd 0.0 GB 0.0% <1 MB │
|
||||
│✔ Disk ok 2.4/468 GB (1%) ││ 1 SSH connection │
|
||||
│ ││✔ unifi 0.9/2.0 GB 0.4% 391 MB │
|
||||
│ ││ │
|
||||
└─────────────────────────────────────────────────────────────┘└────────────────────────────────────────────────────────────┘
|
||||
┌Hosts────────────────────────────────────────────────────────┐
|
||||
│ Host Status Timestamp │
|
||||
│✔ cmbox ok 2025-10-13 05:45:28 │
|
||||
│✔ srv01 ok 2025-10-13 05:45:28 │
|
||||
│? labbox No data received — │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
Keys: [←→] hosts [r]efresh [q]uit
|
||||
```
|
||||
|
||||
@ -176,6 +187,46 @@ sudo cm-dashboard-agent \
|
||||
--interval 5000
|
||||
```
|
||||
|
||||
## Widget Layout
|
||||
|
||||
### Services Widget Structure
|
||||
The Services widget now displays both system metrics and services in a unified table:
|
||||
|
||||
```
|
||||
┌Services────────────────────────────────────────────────────┐
|
||||
│ Service Memory (GB) CPU Disk │
|
||||
│✔ Service Memory 7.1/23899.7 MiB — │ ← System metric as service row
|
||||
│✔ Disk Usage — — 45/100 GB │ ← System metric as service row
|
||||
│⚠ CPU Load — 2.18 — │ ← System metric as service row
|
||||
│✔ CPU Temperature — 47.0°C — │ ← System metric as service row
|
||||
│✔ docker-registry 0.0 GB 0.0% <1 MB │ ← Regular service
|
||||
│✔ nginx 0.0/1.0 GB 0.0% <1 MB │ ← Regular service
|
||||
│✔ ├─ docker.cmtec.se │ ← Nginx site (sub-service)
|
||||
│✔ ├─ git.cmtec.se │ ← Nginx site (sub-service)
|
||||
│✔ └─ gitea.cmtec.se │ ← Nginx site (sub-service)
|
||||
│✔ sshd 0.0 GB 0.0% <1 MB │ ← Regular service
|
||||
│ 1 SSH connection │ ← Service description
|
||||
└────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Row Types:**
|
||||
- **System Metrics**: CPU Load, Service Memory, Disk Usage, CPU Temperature with status indicators
|
||||
- **Regular Services**: Full resource data (memory, CPU, disk) with optional description lines
|
||||
- **Sub-services**: Nginx sites with tree structure, status indicators only (no resource columns)
|
||||
- **Description Lines**: Connection counts and service-specific info without status indicators
|
||||
|
||||
### Hosts Widget (formerly Alerts)
|
||||
The Hosts widget provides a summary view of all monitored hosts:
|
||||
|
||||
```
|
||||
┌Hosts────────────────────────────────────────────────────────┐
|
||||
│ Host Status Timestamp │
|
||||
│✔ cmbox ok 2025-10-13 05:45:28 │
|
||||
│✔ srv01 ok 2025-10-13 05:45:28 │
|
||||
│? labbox No data received — │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Monitoring Components
|
||||
|
||||
### System Collector
|
||||
@ -185,9 +236,12 @@ sudo cm-dashboard-agent \
|
||||
- **C-States**: Power management state distribution (C0-C10)
|
||||
|
||||
### Service Collector
|
||||
- **Systemd Services**: Auto-discovery of interesting services
|
||||
- **Resource Usage**: Per-service memory and disk consumption
|
||||
- **Service Health**: Running/stopped status with detailed failure info
|
||||
- **System Metrics as Services**: CPU Load, Service Memory, Disk Usage, CPU Temperature displayed as individual service rows
|
||||
- **Systemd Services**: Auto-discovery of interesting services with resource monitoring
|
||||
- **Nginx Site Monitoring**: Individual rows for each nginx virtual host with tree structure (`├─` and `└─`)
|
||||
- **Resource Usage**: Per-service memory, CPU, and disk consumption
|
||||
- **Service Health**: Running/stopped/degraded status with detailed failure info
|
||||
- **Connection Tracking**: SSH connections, database connections as description lines
|
||||
|
||||
### SMART Collector
|
||||
- **NVMe Health**: Temperature, wear leveling, spare blocks
|
||||
|
||||
@ -114,6 +114,7 @@ impl ServiceCollector {
|
||||
sandbox_limit: None, // TODO: Implement sandbox limit detection
|
||||
disk_used_gb,
|
||||
description,
|
||||
sub_service: false,
|
||||
})
|
||||
}
|
||||
|
||||
@ -404,6 +405,226 @@ impl ServiceCollector {
|
||||
}
|
||||
}
|
||||
|
||||
async fn add_system_metrics_as_services(&self, services: &mut Vec<ServiceData>, healthy: &mut usize, degraded: &mut usize, failed: &mut usize) {
|
||||
// Get system metrics
|
||||
if let Ok((load_1, _load_5, _load_15)) = self.get_cpu_load().await {
|
||||
let cpu_status = self.determine_cpu_status(load_1);
|
||||
match cpu_status.as_str() {
|
||||
"ok" => *healthy += 1,
|
||||
"warning" => *degraded += 1,
|
||||
"critical" => *failed += 1,
|
||||
_ => *degraded += 1,
|
||||
}
|
||||
services.push(ServiceData {
|
||||
name: "CPU Load".to_string(),
|
||||
status: match cpu_status.as_str() {
|
||||
"ok" => ServiceStatus::Running,
|
||||
"warning" => ServiceStatus::Degraded,
|
||||
"critical" => ServiceStatus::Stopped,
|
||||
_ => ServiceStatus::Degraded,
|
||||
},
|
||||
memory_used_mb: 0.0,
|
||||
memory_quota_mb: 0.0,
|
||||
cpu_percent: load_1,
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: 0.0,
|
||||
description: None,
|
||||
sub_service: false,
|
||||
});
|
||||
}
|
||||
|
||||
if let Ok((memory_used_mb, memory_total_mb)) = self.get_memory_info().await {
|
||||
let memory_status = self.determine_memory_status(memory_used_mb, memory_total_mb);
|
||||
match memory_status.as_str() {
|
||||
"ok" => *healthy += 1,
|
||||
"warning" => *degraded += 1,
|
||||
"critical" => *failed += 1,
|
||||
_ => *degraded += 1,
|
||||
}
|
||||
services.push(ServiceData {
|
||||
name: "Service Memory".to_string(),
|
||||
status: match memory_status.as_str() {
|
||||
"ok" => ServiceStatus::Running,
|
||||
"warning" => ServiceStatus::Degraded,
|
||||
"critical" => ServiceStatus::Stopped,
|
||||
_ => ServiceStatus::Degraded,
|
||||
},
|
||||
memory_used_mb,
|
||||
memory_quota_mb: memory_total_mb,
|
||||
cpu_percent: 0.0,
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: 0.0,
|
||||
description: None,
|
||||
sub_service: false,
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(cpu_temp) = self.get_cpu_temperature().await {
|
||||
let temp_status = self.determine_cpu_temp_status(cpu_temp);
|
||||
match temp_status.as_str() {
|
||||
"ok" => *healthy += 1,
|
||||
"warning" => *degraded += 1,
|
||||
"critical" => *failed += 1,
|
||||
_ => *degraded += 1,
|
||||
}
|
||||
services.push(ServiceData {
|
||||
name: "CPU Temperature".to_string(),
|
||||
status: match temp_status.as_str() {
|
||||
"ok" => ServiceStatus::Running,
|
||||
"warning" => ServiceStatus::Degraded,
|
||||
"critical" => ServiceStatus::Stopped,
|
||||
_ => ServiceStatus::Degraded,
|
||||
},
|
||||
memory_used_mb: 0.0,
|
||||
memory_quota_mb: 0.0,
|
||||
cpu_percent: cpu_temp,
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: 0.0,
|
||||
description: None,
|
||||
sub_service: false,
|
||||
});
|
||||
}
|
||||
|
||||
if let Ok(disk_usage) = self.get_disk_usage().await {
|
||||
services.push(ServiceData {
|
||||
name: "Disk Usage".to_string(),
|
||||
status: ServiceStatus::Running,
|
||||
memory_used_mb: 0.0,
|
||||
memory_quota_mb: 0.0,
|
||||
cpu_percent: 0.0,
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: disk_usage.used_gb,
|
||||
description: None,
|
||||
sub_service: false,
|
||||
});
|
||||
*healthy += 1;
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_cpu_load(&self) -> Result<(f32, f32, f32), CollectorError> {
|
||||
let output = tokio::process::Command::new("uptime")
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| CollectorError::CommandFailed {
|
||||
command: "uptime".to_string(),
|
||||
message: e.to_string()
|
||||
})?;
|
||||
|
||||
let uptime_str = String::from_utf8_lossy(&output.stdout);
|
||||
|
||||
if let Some(load_part) = uptime_str.split("load average:").nth(1) {
|
||||
let load_str = load_part.trim();
|
||||
let loads: Vec<&str> = load_str.split(", ").collect();
|
||||
if loads.len() >= 3 {
|
||||
let load_1 = loads[0].trim().replace(',', ".").parse::<f32>()
|
||||
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 1min load".to_string() })?;
|
||||
let load_5 = loads[1].trim().replace(',', ".").parse::<f32>()
|
||||
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 5min load".to_string() })?;
|
||||
let load_15 = loads[2].trim().replace(',', ".").parse::<f32>()
|
||||
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 15min load".to_string() })?;
|
||||
|
||||
return Ok((load_1, load_5, load_15));
|
||||
}
|
||||
}
|
||||
|
||||
Err(CollectorError::ParseError { message: "Failed to parse load averages".to_string() })
|
||||
}
|
||||
|
||||
async fn get_cpu_temperature(&self) -> Option<f32> {
|
||||
for i in 0..10 {
|
||||
let type_path = format!("/sys/class/thermal/thermal_zone{}/type", i);
|
||||
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
|
||||
|
||||
if let (Ok(zone_type), Ok(temp_str)) = (
|
||||
fs::read_to_string(&type_path).await,
|
||||
fs::read_to_string(&temp_path).await,
|
||||
) {
|
||||
let zone_type = zone_type.trim();
|
||||
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
|
||||
let temp_c = temp_millic / 1000.0;
|
||||
if temp_c > 20.0 && temp_c < 150.0 {
|
||||
if zone_type == "x86_pkg_temp" || zone_type.contains("coretemp") {
|
||||
return Some(temp_c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..10 {
|
||||
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
|
||||
if let Ok(temp_str) = fs::read_to_string(&temp_path).await {
|
||||
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
|
||||
let temp_c = temp_millic / 1000.0;
|
||||
if temp_c > 20.0 && temp_c < 150.0 {
|
||||
return Some(temp_c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
async fn get_memory_info(&self) -> Result<(f32, f32), CollectorError> {
|
||||
let meminfo = fs::read_to_string("/proc/meminfo")
|
||||
.await
|
||||
.map_err(|e| CollectorError::IoError { message: format!("Failed to read /proc/meminfo: {}", e) })?;
|
||||
|
||||
let mut mem_total = None;
|
||||
let mut mem_available = None;
|
||||
|
||||
for line in meminfo.lines() {
|
||||
if let Some(value_str) = line.strip_prefix("MemTotal:") {
|
||||
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
|
||||
mem_total = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
|
||||
}
|
||||
} else if let Some(value_str) = line.strip_prefix("MemAvailable:") {
|
||||
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
|
||||
mem_available = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match (mem_total, mem_available) {
|
||||
(Some(total), Some(available)) => {
|
||||
let used = total - available;
|
||||
Ok((used, total))
|
||||
}
|
||||
_ => Err(CollectorError::ParseError { message: "Failed to parse memory info".to_string() }),
|
||||
}
|
||||
}
|
||||
|
||||
fn determine_cpu_status(&self, load_1: f32) -> String {
|
||||
if load_1 >= 8.0 {
|
||||
"critical".to_string()
|
||||
} else if load_1 >= 5.0 {
|
||||
"warning".to_string()
|
||||
} else {
|
||||
"ok".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn determine_memory_status(&self, used_mb: f32, total_mb: f32) -> String {
|
||||
let usage_percent = (used_mb / total_mb) * 100.0;
|
||||
if usage_percent >= 95.0 {
|
||||
"critical".to_string()
|
||||
} else if usage_percent >= 80.0 {
|
||||
"warning".to_string()
|
||||
} else {
|
||||
"ok".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn determine_cpu_temp_status(&self, temp_c: f32) -> String {
|
||||
if temp_c >= 100.0 {
|
||||
"critical".to_string()
|
||||
} else if temp_c >= 100.0 {
|
||||
"warning".to_string()
|
||||
} else {
|
||||
"ok".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_service_description_with_cache(&self, service: &str) -> Option<Vec<String>> {
|
||||
// Check if we should update the cache (throttled)
|
||||
let should_update = self.should_update_description(service).await;
|
||||
@ -676,7 +897,7 @@ impl ServiceCollector {
|
||||
let mut accessible_sites = Vec::new();
|
||||
for site in sites {
|
||||
if self.check_site_accessibility(&site).await {
|
||||
accessible_sites.push(format!("{} ✓", site));
|
||||
accessible_sites.push(site); // Remove checkmark - status will be shown via sub_service row status
|
||||
}
|
||||
}
|
||||
|
||||
@ -791,6 +1012,9 @@ impl Collector for ServiceCollector {
|
||||
let mut total_memory_quota = 0.0;
|
||||
let mut total_disk_used = 0.0;
|
||||
|
||||
// Add system metrics as service entries first
|
||||
self.add_system_metrics_as_services(&mut services, &mut healthy, &mut degraded, &mut failed).await;
|
||||
|
||||
// Collect data from all configured services
|
||||
for service in &self.services {
|
||||
match self.get_service_status(service).await {
|
||||
@ -807,7 +1031,39 @@ impl Collector for ServiceCollector {
|
||||
}
|
||||
total_disk_used += service_data.disk_used_gb;
|
||||
|
||||
services.push(service_data);
|
||||
// Handle nginx specially - create sub-services for sites
|
||||
if service == "nginx" && matches!(service_data.status, ServiceStatus::Running) {
|
||||
// Clear nginx description - sites will become individual sub-services
|
||||
let mut nginx_service = service_data;
|
||||
nginx_service.description = None;
|
||||
services.push(nginx_service);
|
||||
|
||||
// Add nginx sites as individual sub-services
|
||||
if let Some(sites) = self.get_nginx_sites().await {
|
||||
for (i, site) in sites.iter().enumerate() {
|
||||
let site_name = if i == sites.len() - 1 {
|
||||
format!("└─ {}", site)
|
||||
} else {
|
||||
format!("├─ {}", site)
|
||||
};
|
||||
|
||||
services.push(ServiceData {
|
||||
name: site_name,
|
||||
status: ServiceStatus::Running, // Assume sites are running if nginx is running
|
||||
memory_used_mb: 0.0,
|
||||
memory_quota_mb: 0.0,
|
||||
cpu_percent: 0.0,
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: 0.0,
|
||||
description: None,
|
||||
sub_service: true,
|
||||
});
|
||||
healthy += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
services.push(service_data);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
failed += 1;
|
||||
@ -821,6 +1077,7 @@ impl Collector for ServiceCollector {
|
||||
sandbox_limit: None,
|
||||
disk_used_gb: 0.0,
|
||||
description: None,
|
||||
sub_service: false,
|
||||
});
|
||||
tracing::warn!("Failed to collect metrics for service {}: {}", service, e);
|
||||
}
|
||||
@ -879,6 +1136,8 @@ struct ServiceData {
|
||||
disk_used_gb: f32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
description: Option<Vec<String>>,
|
||||
#[serde(default)]
|
||||
sub_service: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
|
||||
@ -116,6 +116,8 @@ pub struct ServiceInfo {
|
||||
pub disk_used_gb: f32,
|
||||
#[serde(default)]
|
||||
pub description: Option<Vec<String>>,
|
||||
#[serde(default)]
|
||||
pub sub_service: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
||||
@ -6,7 +6,7 @@ use ratatui::Frame;
|
||||
|
||||
use crate::app::App;
|
||||
|
||||
use super::{alerts, backup, services, storage, system};
|
||||
use super::{hosts, backup, services, storage, system};
|
||||
|
||||
pub fn render(frame: &mut Frame, app: &App) {
|
||||
let host_summaries = app.host_display_data();
|
||||
@ -56,7 +56,7 @@ pub fn render(frame: &mut Frame, app: &App) {
|
||||
backup::render(frame, primary_host.as_ref(), left_widgets[2]);
|
||||
services::render(frame, primary_host.as_ref(), services_area);
|
||||
|
||||
alerts::render(frame, &host_summaries, left_side[1]);
|
||||
hosts::render(frame, &host_summaries, left_side[1]);
|
||||
|
||||
if app.help_visible() {
|
||||
render_help(frame, size);
|
||||
|
||||
@ -9,13 +9,13 @@ use crate::ui::widget::{render_widget_data, WidgetData, WidgetStatus, StatusLeve
|
||||
pub fn render(frame: &mut Frame, hosts: &[HostDisplayData], area: Rect) {
|
||||
let (severity, _ok_count, _warn_count, _fail_count) = classify_hosts(hosts);
|
||||
|
||||
let title = "Alerts".to_string();
|
||||
let title = "Hosts".to_string();
|
||||
|
||||
let widget_status = match severity {
|
||||
AlertSeverity::Critical => StatusLevel::Error,
|
||||
AlertSeverity::Warning => StatusLevel::Warning,
|
||||
AlertSeverity::Healthy => StatusLevel::Ok,
|
||||
AlertSeverity::Unknown => StatusLevel::Unknown,
|
||||
HostSeverity::Critical => StatusLevel::Error,
|
||||
HostSeverity::Warning => StatusLevel::Warning,
|
||||
HostSeverity::Healthy => StatusLevel::Ok,
|
||||
HostSeverity::Unknown => StatusLevel::Unknown,
|
||||
};
|
||||
|
||||
let mut data = WidgetData::new(
|
||||
@ -38,10 +38,10 @@ pub fn render(frame: &mut Frame, hosts: &[HostDisplayData], area: Rect) {
|
||||
for host in hosts {
|
||||
let (status_text, severity, _emphasize) = host_status(host);
|
||||
let status_level = match severity {
|
||||
AlertSeverity::Critical => StatusLevel::Error,
|
||||
AlertSeverity::Warning => StatusLevel::Warning,
|
||||
AlertSeverity::Healthy => StatusLevel::Ok,
|
||||
AlertSeverity::Unknown => StatusLevel::Unknown,
|
||||
HostSeverity::Critical => StatusLevel::Error,
|
||||
HostSeverity::Warning => StatusLevel::Warning,
|
||||
HostSeverity::Healthy => StatusLevel::Ok,
|
||||
HostSeverity::Unknown => StatusLevel::Unknown,
|
||||
};
|
||||
let update = latest_timestamp(host)
|
||||
.map(|ts| ts.format("%Y-%m-%d %H:%M:%S").to_string())
|
||||
@ -63,14 +63,14 @@ pub fn render(frame: &mut Frame, hosts: &[HostDisplayData], area: Rect) {
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
enum AlertSeverity {
|
||||
enum HostSeverity {
|
||||
Healthy,
|
||||
Warning,
|
||||
Critical,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
fn classify_hosts(hosts: &[HostDisplayData]) -> (AlertSeverity, usize, usize, usize) {
|
||||
fn classify_hosts(hosts: &[HostDisplayData]) -> (HostSeverity, usize, usize, usize) {
|
||||
let mut ok = 0;
|
||||
let mut warn = 0;
|
||||
let mut fail = 0;
|
||||
@ -78,81 +78,81 @@ fn classify_hosts(hosts: &[HostDisplayData]) -> (AlertSeverity, usize, usize, us
|
||||
for host in hosts {
|
||||
let severity = host_severity(host);
|
||||
match severity {
|
||||
AlertSeverity::Healthy => ok += 1,
|
||||
AlertSeverity::Warning => warn += 1,
|
||||
AlertSeverity::Critical => fail += 1,
|
||||
AlertSeverity::Unknown => warn += 1,
|
||||
HostSeverity::Healthy => ok += 1,
|
||||
HostSeverity::Warning => warn += 1,
|
||||
HostSeverity::Critical => fail += 1,
|
||||
HostSeverity::Unknown => warn += 1,
|
||||
}
|
||||
}
|
||||
|
||||
let highest = if fail > 0 {
|
||||
AlertSeverity::Critical
|
||||
HostSeverity::Critical
|
||||
} else if warn > 0 {
|
||||
AlertSeverity::Warning
|
||||
HostSeverity::Warning
|
||||
} else if ok > 0 {
|
||||
AlertSeverity::Healthy
|
||||
HostSeverity::Healthy
|
||||
} else {
|
||||
AlertSeverity::Unknown
|
||||
HostSeverity::Unknown
|
||||
};
|
||||
|
||||
(highest, ok, warn, fail)
|
||||
}
|
||||
|
||||
fn host_severity(host: &HostDisplayData) -> AlertSeverity {
|
||||
fn host_severity(host: &HostDisplayData) -> HostSeverity {
|
||||
// Check connection status first
|
||||
match host.connection_status {
|
||||
ConnectionStatus::Error => return AlertSeverity::Critical,
|
||||
ConnectionStatus::Timeout => return AlertSeverity::Warning,
|
||||
ConnectionStatus::Unknown => return AlertSeverity::Unknown,
|
||||
ConnectionStatus::Error => return HostSeverity::Critical,
|
||||
ConnectionStatus::Timeout => return HostSeverity::Warning,
|
||||
ConnectionStatus::Unknown => return HostSeverity::Unknown,
|
||||
ConnectionStatus::Connected => {}, // Continue with other checks
|
||||
}
|
||||
|
||||
if host.last_error.is_some() {
|
||||
return AlertSeverity::Critical;
|
||||
return HostSeverity::Critical;
|
||||
}
|
||||
|
||||
if let Some(smart) = host.smart.as_ref() {
|
||||
if smart.summary.critical > 0 {
|
||||
return AlertSeverity::Critical;
|
||||
return HostSeverity::Critical;
|
||||
}
|
||||
if smart.summary.warning > 0 || !smart.issues.is_empty() {
|
||||
return AlertSeverity::Warning;
|
||||
return HostSeverity::Warning;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(services) = host.services.as_ref() {
|
||||
if services.summary.failed > 0 {
|
||||
return AlertSeverity::Critical;
|
||||
return HostSeverity::Critical;
|
||||
}
|
||||
if services.summary.degraded > 0 {
|
||||
return AlertSeverity::Warning;
|
||||
return HostSeverity::Warning;
|
||||
}
|
||||
|
||||
// TODO: Update to use agent-provided system statuses instead of evaluate_performance
|
||||
// let (perf_severity, _) = evaluate_performance(&services.summary);
|
||||
// match perf_severity {
|
||||
// PerfSeverity::Critical => return AlertSeverity::Critical,
|
||||
// PerfSeverity::Warning => return AlertSeverity::Warning,
|
||||
// PerfSeverity::Critical => return HostSeverity::Critical,
|
||||
// PerfSeverity::Warning => return HostSeverity::Warning,
|
||||
// PerfSeverity::Ok => {}
|
||||
// }
|
||||
}
|
||||
|
||||
if let Some(backup) = host.backup.as_ref() {
|
||||
match backup.overall_status.as_str() {
|
||||
"critical" => return AlertSeverity::Critical,
|
||||
"warning" => return AlertSeverity::Warning,
|
||||
"critical" => return HostSeverity::Critical,
|
||||
"warning" => return HostSeverity::Warning,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if host.smart.is_none() && host.services.is_none() && host.backup.is_none() {
|
||||
AlertSeverity::Unknown
|
||||
HostSeverity::Unknown
|
||||
} else {
|
||||
AlertSeverity::Healthy
|
||||
HostSeverity::Healthy
|
||||
}
|
||||
}
|
||||
|
||||
fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
||||
fn host_status(host: &HostDisplayData) -> (String, HostSeverity, bool) {
|
||||
// Check connection status first
|
||||
match host.connection_status {
|
||||
ConnectionStatus::Error => {
|
||||
@ -161,7 +161,7 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
||||
} else {
|
||||
"Connection error".to_string()
|
||||
};
|
||||
return (msg, AlertSeverity::Critical, true);
|
||||
return (msg, HostSeverity::Critical, true);
|
||||
},
|
||||
ConnectionStatus::Timeout => {
|
||||
let msg = if let Some(error) = &host.last_error {
|
||||
@ -169,28 +169,28 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
||||
} else {
|
||||
"Keep-alive timeout".to_string()
|
||||
};
|
||||
return (msg, AlertSeverity::Warning, true);
|
||||
return (msg, HostSeverity::Warning, true);
|
||||
},
|
||||
ConnectionStatus::Unknown => {
|
||||
return ("No data received".to_string(), AlertSeverity::Unknown, true);
|
||||
return ("No data received".to_string(), HostSeverity::Unknown, true);
|
||||
},
|
||||
ConnectionStatus::Connected => {}, // Continue with other checks
|
||||
}
|
||||
|
||||
if let Some(error) = &host.last_error {
|
||||
return (format!("error: {}", error), AlertSeverity::Critical, true);
|
||||
return (format!("error: {}", error), HostSeverity::Critical, true);
|
||||
}
|
||||
|
||||
if let Some(smart) = host.smart.as_ref() {
|
||||
if smart.summary.critical > 0 {
|
||||
return (
|
||||
"critical: SMART critical".to_string(),
|
||||
AlertSeverity::Critical,
|
||||
HostSeverity::Critical,
|
||||
true,
|
||||
);
|
||||
}
|
||||
if let Some(issue) = smart.issues.first() {
|
||||
return (format!("warning: {}", issue), AlertSeverity::Warning, true);
|
||||
return (format!("warning: {}", issue), HostSeverity::Warning, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -198,14 +198,14 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
||||
if services.summary.failed > 0 {
|
||||
return (
|
||||
format!("critical: {} failed svc", services.summary.failed),
|
||||
AlertSeverity::Critical,
|
||||
HostSeverity::Critical,
|
||||
true,
|
||||
);
|
||||
}
|
||||
if services.summary.degraded > 0 {
|
||||
return (
|
||||
format!("warning: {} degraded svc", services.summary.degraded),
|
||||
AlertSeverity::Warning,
|
||||
HostSeverity::Warning,
|
||||
true,
|
||||
);
|
||||
}
|
||||
@ -217,14 +217,14 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
||||
// PerfSeverity::Critical => {
|
||||
// return (
|
||||
// format!("critical: {}", reason_text),
|
||||
// AlertSeverity::Critical,
|
||||
// HostSeverity::Critical,
|
||||
// true,
|
||||
// );
|
||||
// }
|
||||
// PerfSeverity::Warning => {
|
||||
// return (
|
||||
// format!("warning: {}", reason_text),
|
||||
// AlertSeverity::Warning,
|
||||
// HostSeverity::Warning,
|
||||
// true,
|
||||
// );
|
||||
// }
|
||||
@ -238,14 +238,14 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
||||
"critical" => {
|
||||
return (
|
||||
"critical: backup failed".to_string(),
|
||||
AlertSeverity::Critical,
|
||||
HostSeverity::Critical,
|
||||
true,
|
||||
);
|
||||
}
|
||||
"warning" => {
|
||||
return (
|
||||
"warning: backup warning".to_string(),
|
||||
AlertSeverity::Warning,
|
||||
HostSeverity::Warning,
|
||||
true,
|
||||
);
|
||||
}
|
||||
@ -260,10 +260,10 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
||||
"pending: no recent data"
|
||||
};
|
||||
|
||||
return (status.to_string(), AlertSeverity::Warning, false);
|
||||
return (status.to_string(), HostSeverity::Warning, false);
|
||||
}
|
||||
|
||||
("ok".to_string(), AlertSeverity::Healthy, false)
|
||||
("ok".to_string(), HostSeverity::Healthy, false)
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
pub mod alerts;
|
||||
pub mod hosts;
|
||||
pub mod backup;
|
||||
pub mod dashboard;
|
||||
pub mod services;
|
||||
|
||||
@ -91,16 +91,31 @@ fn render_metrics(
|
||||
vec![]
|
||||
};
|
||||
|
||||
data.add_row(
|
||||
Some(WidgetStatus::new(status_level)),
|
||||
description,
|
||||
vec![
|
||||
svc.name.clone(),
|
||||
format_memory_value(svc.memory_used_mb, svc.memory_quota_mb),
|
||||
format_cpu_value(svc.cpu_percent),
|
||||
format_disk_value(svc.disk_used_gb),
|
||||
],
|
||||
);
|
||||
if svc.sub_service {
|
||||
// Sub-services only show name and status, no memory/CPU/disk data
|
||||
data.add_row(
|
||||
Some(WidgetStatus::new(status_level)),
|
||||
description,
|
||||
vec![
|
||||
svc.name.clone(),
|
||||
"".to_string(),
|
||||
"".to_string(),
|
||||
"".to_string(),
|
||||
],
|
||||
);
|
||||
} else {
|
||||
// Regular services show all columns
|
||||
data.add_row(
|
||||
Some(WidgetStatus::new(status_level)),
|
||||
description,
|
||||
vec![
|
||||
svc.name.clone(),
|
||||
format_memory_value(svc.memory_used_mb, svc.memory_quota_mb),
|
||||
format_cpu_value(svc.cpu_percent),
|
||||
format_disk_value(svc.disk_used_gb),
|
||||
],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
render_widget_data(frame, area, data);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user