Refactor services widget with unified system metrics display
- Rename alerts widget to hosts widget for clarity - Add sub_service field to ServiceInfo for display differentiation - Integrate system metrics (CPU load, memory, temperature, disk) as service rows - Convert nginx sites to individual sub-service rows with tree structure - Remove nginx site checkmarks - status now shown via row indicators - Update dashboard layout to display system and service data together - Maintain description lines for connection counts and service details Services widget now shows: - System metrics as regular service rows with status - Nginx sites as sub-services with ├─/└─ tree formatting - Regular services with full resource data and descriptions - Unified status indication across all row types
This commit is contained in:
parent
c68ccf023e
commit
bab387c74d
104
README.md
104
README.md
@ -3,28 +3,39 @@
|
|||||||
A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure. Built to replace Glance with a custom solution tailored for specific monitoring needs and API integrations. Features real-time monitoring of all infrastructure components with intelligent email notifications and automatic status calculation.
|
A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure. Built to replace Glance with a custom solution tailored for specific monitoring needs and API integrations. Features real-time monitoring of all infrastructure components with intelligent email notifications and automatic status calculation.
|
||||||
|
|
||||||
```
|
```
|
||||||
┌─────────────────────────────────────────────────────────────────────┐
|
CM Dashboard • srv01
|
||||||
│ CM Dashboard • cmbox │
|
┌System───────────────────────────────────────────────────────┐┌Services────────────────────────────────────────────────────┐
|
||||||
├─────────────────────────────────────────────────────────────────────┤
|
│ Memory usage ││ Service Memory (GB) CPU Disk │
|
||||||
│ Storage • ok:1 warn:0 crit:0 │ Services • ok:1 warn:0 fail:0 │
|
│✔ 3.0 / 7.8 GB ││✔ Service Memory 7.1/23899.7 MiB — │
|
||||||
│ ┌─────────────────────────────────┐ │ ┌─────────────────────────────── │ │
|
│ CPU load CPU temp ││✔ Disk Usage — — 45/100 GB │
|
||||||
│ │Drive Temp Wear Spare Hours │ │ │Service memory: 7.1/23899.7 MiB│ │
|
│✔ 1.05 • 0.96 • 0.58 64.0°C ││⚠ CPU Load — 2.18 — │
|
||||||
│ │nvme0n1 28°C 1% 100% 14489 │ │ │Disk usage: — │ │
|
│ C1E C3 C6 C8 C9 C10 ││✔ CPU Temperature — 47.0°C — │
|
||||||
│ │ Capacity Usage │ │ │ Service Memory Disk │ │
|
│✔ 0.5% 0.5% 10.4% 10.2% 0.4% 77.9% ││✔ docker-registry 0.0 GB 0.0% <1 MB │
|
||||||
│ │ 954G 77G (8%) │ │ │✔ sshd 7.1 MiB — │ │
|
│ GPU load GPU temp ││✔ gitea 0.4/4.1 GB 0.2% 970 MB │
|
||||||
│ └─────────────────────────────────┘ │ └─────────────────────────────── │ │
|
└─────────────────────────────────────────────────────────────┘│ 1 active connections │
|
||||||
├─────────────────────────────────────────────────────────────────────┤
|
┌Storage──────────────────────────────────────────────────────┐│✔ nginx 0.0/1.0 GB 0.0% <1 MB │
|
||||||
│ CPU / Memory • warn │ Backups │
|
│ Drive Temp Wear Spare Hours Capacity Usage ││✔ ├─ docker.cmtec.se │
|
||||||
│ System memory: 5251.7/23899.7 MiB │ Host cmbox awaiting backup │ │
|
│✔ nvme0n1 57°C 4% 100% 11463 932G 23G (2%) ││✔ ├─ git.cmtec.se │
|
||||||
│ CPU load (1/5/15): 2.18 2.66 2.56 │ metrics │ │
|
│ ││✔ ├─ gitea.cmtec.se │
|
||||||
│ CPU freq: 1100.1 MHz │ │ │
|
│ ││✔ ├─ haasp.cmtec.se │
|
||||||
│ CPU temp: 47.0°C │ │ │
|
│ ││✔ ├─ pages.cmtec.se │
|
||||||
├─────────────────────────────────────────────────────────────────────┤
|
│ ││✔ ├─ photos.cmtec.se │
|
||||||
│ Alerts • ok:0 warn:3 fail:0 │ Status • ZMQ connected │
|
└─────────────────────────────────────────────────────────────┘│✔ └─ www.kryddorten.se │
|
||||||
│ cmbox: warning: CPU load 2.18 │ Monitoring • hosts: 3 │ │
|
┌Backups──────────────────────────────────────────────────────┐│✔ postgresql 0.1 GB 0.0% 378 MB │
|
||||||
│ srv01: pending: awaiting metrics │ Data source: ZMQ – connected │ │
|
│ Backup Status Details ││ 1 active connections │
|
||||||
│ labbox: pending: awaiting metrics │ Active host: cmbox (1/3) │ │
|
│✔ Latest 3h ago 1.4 GiB ││✔ redis-immich 0.0 GB 0.4% <1 MB │
|
||||||
└─────────────────────────────────────────────────────────────────────┘
|
│ 8 archives, 2.4 GiB total ││✔ sshd 0.0 GB 0.0% <1 MB │
|
||||||
|
│✔ Disk ok 2.4/468 GB (1%) ││ 1 SSH connection │
|
||||||
|
│ ││✔ unifi 0.9/2.0 GB 0.4% 391 MB │
|
||||||
|
│ ││ │
|
||||||
|
└─────────────────────────────────────────────────────────────┘└────────────────────────────────────────────────────────────┘
|
||||||
|
┌Hosts────────────────────────────────────────────────────────┐
|
||||||
|
│ Host Status Timestamp │
|
||||||
|
│✔ cmbox ok 2025-10-13 05:45:28 │
|
||||||
|
│✔ srv01 ok 2025-10-13 05:45:28 │
|
||||||
|
│? labbox No data received — │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
Keys: [←→] hosts [r]efresh [q]uit
|
Keys: [←→] hosts [r]efresh [q]uit
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -176,6 +187,46 @@ sudo cm-dashboard-agent \
|
|||||||
--interval 5000
|
--interval 5000
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Widget Layout
|
||||||
|
|
||||||
|
### Services Widget Structure
|
||||||
|
The Services widget now displays both system metrics and services in a unified table:
|
||||||
|
|
||||||
|
```
|
||||||
|
┌Services────────────────────────────────────────────────────┐
|
||||||
|
│ Service Memory (GB) CPU Disk │
|
||||||
|
│✔ Service Memory 7.1/23899.7 MiB — │ ← System metric as service row
|
||||||
|
│✔ Disk Usage — — 45/100 GB │ ← System metric as service row
|
||||||
|
│⚠ CPU Load — 2.18 — │ ← System metric as service row
|
||||||
|
│✔ CPU Temperature — 47.0°C — │ ← System metric as service row
|
||||||
|
│✔ docker-registry 0.0 GB 0.0% <1 MB │ ← Regular service
|
||||||
|
│✔ nginx 0.0/1.0 GB 0.0% <1 MB │ ← Regular service
|
||||||
|
│✔ ├─ docker.cmtec.se │ ← Nginx site (sub-service)
|
||||||
|
│✔ ├─ git.cmtec.se │ ← Nginx site (sub-service)
|
||||||
|
│✔ └─ gitea.cmtec.se │ ← Nginx site (sub-service)
|
||||||
|
│✔ sshd 0.0 GB 0.0% <1 MB │ ← Regular service
|
||||||
|
│ 1 SSH connection │ ← Service description
|
||||||
|
└────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
**Row Types:**
|
||||||
|
- **System Metrics**: CPU Load, Service Memory, Disk Usage, CPU Temperature with status indicators
|
||||||
|
- **Regular Services**: Full resource data (memory, CPU, disk) with optional description lines
|
||||||
|
- **Sub-services**: Nginx sites with tree structure, status indicators only (no resource columns)
|
||||||
|
- **Description Lines**: Connection counts and service-specific info without status indicators
|
||||||
|
|
||||||
|
### Hosts Widget (formerly Alerts)
|
||||||
|
The Hosts widget provides a summary view of all monitored hosts:
|
||||||
|
|
||||||
|
```
|
||||||
|
┌Hosts────────────────────────────────────────────────────────┐
|
||||||
|
│ Host Status Timestamp │
|
||||||
|
│✔ cmbox ok 2025-10-13 05:45:28 │
|
||||||
|
│✔ srv01 ok 2025-10-13 05:45:28 │
|
||||||
|
│? labbox No data received — │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
## Monitoring Components
|
## Monitoring Components
|
||||||
|
|
||||||
### System Collector
|
### System Collector
|
||||||
@ -185,9 +236,12 @@ sudo cm-dashboard-agent \
|
|||||||
- **C-States**: Power management state distribution (C0-C10)
|
- **C-States**: Power management state distribution (C0-C10)
|
||||||
|
|
||||||
### Service Collector
|
### Service Collector
|
||||||
- **Systemd Services**: Auto-discovery of interesting services
|
- **System Metrics as Services**: CPU Load, Service Memory, Disk Usage, CPU Temperature displayed as individual service rows
|
||||||
- **Resource Usage**: Per-service memory and disk consumption
|
- **Systemd Services**: Auto-discovery of interesting services with resource monitoring
|
||||||
- **Service Health**: Running/stopped status with detailed failure info
|
- **Nginx Site Monitoring**: Individual rows for each nginx virtual host with tree structure (`├─` and `└─`)
|
||||||
|
- **Resource Usage**: Per-service memory, CPU, and disk consumption
|
||||||
|
- **Service Health**: Running/stopped/degraded status with detailed failure info
|
||||||
|
- **Connection Tracking**: SSH connections, database connections as description lines
|
||||||
|
|
||||||
### SMART Collector
|
### SMART Collector
|
||||||
- **NVMe Health**: Temperature, wear leveling, spare blocks
|
- **NVMe Health**: Temperature, wear leveling, spare blocks
|
||||||
|
|||||||
@ -114,6 +114,7 @@ impl ServiceCollector {
|
|||||||
sandbox_limit: None, // TODO: Implement sandbox limit detection
|
sandbox_limit: None, // TODO: Implement sandbox limit detection
|
||||||
disk_used_gb,
|
disk_used_gb,
|
||||||
description,
|
description,
|
||||||
|
sub_service: false,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -404,6 +405,226 @@ impl ServiceCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn add_system_metrics_as_services(&self, services: &mut Vec<ServiceData>, healthy: &mut usize, degraded: &mut usize, failed: &mut usize) {
|
||||||
|
// Get system metrics
|
||||||
|
if let Ok((load_1, _load_5, _load_15)) = self.get_cpu_load().await {
|
||||||
|
let cpu_status = self.determine_cpu_status(load_1);
|
||||||
|
match cpu_status.as_str() {
|
||||||
|
"ok" => *healthy += 1,
|
||||||
|
"warning" => *degraded += 1,
|
||||||
|
"critical" => *failed += 1,
|
||||||
|
_ => *degraded += 1,
|
||||||
|
}
|
||||||
|
services.push(ServiceData {
|
||||||
|
name: "CPU Load".to_string(),
|
||||||
|
status: match cpu_status.as_str() {
|
||||||
|
"ok" => ServiceStatus::Running,
|
||||||
|
"warning" => ServiceStatus::Degraded,
|
||||||
|
"critical" => ServiceStatus::Stopped,
|
||||||
|
_ => ServiceStatus::Degraded,
|
||||||
|
},
|
||||||
|
memory_used_mb: 0.0,
|
||||||
|
memory_quota_mb: 0.0,
|
||||||
|
cpu_percent: load_1,
|
||||||
|
sandbox_limit: None,
|
||||||
|
disk_used_gb: 0.0,
|
||||||
|
description: None,
|
||||||
|
sub_service: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Ok((memory_used_mb, memory_total_mb)) = self.get_memory_info().await {
|
||||||
|
let memory_status = self.determine_memory_status(memory_used_mb, memory_total_mb);
|
||||||
|
match memory_status.as_str() {
|
||||||
|
"ok" => *healthy += 1,
|
||||||
|
"warning" => *degraded += 1,
|
||||||
|
"critical" => *failed += 1,
|
||||||
|
_ => *degraded += 1,
|
||||||
|
}
|
||||||
|
services.push(ServiceData {
|
||||||
|
name: "Service Memory".to_string(),
|
||||||
|
status: match memory_status.as_str() {
|
||||||
|
"ok" => ServiceStatus::Running,
|
||||||
|
"warning" => ServiceStatus::Degraded,
|
||||||
|
"critical" => ServiceStatus::Stopped,
|
||||||
|
_ => ServiceStatus::Degraded,
|
||||||
|
},
|
||||||
|
memory_used_mb,
|
||||||
|
memory_quota_mb: memory_total_mb,
|
||||||
|
cpu_percent: 0.0,
|
||||||
|
sandbox_limit: None,
|
||||||
|
disk_used_gb: 0.0,
|
||||||
|
description: None,
|
||||||
|
sub_service: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(cpu_temp) = self.get_cpu_temperature().await {
|
||||||
|
let temp_status = self.determine_cpu_temp_status(cpu_temp);
|
||||||
|
match temp_status.as_str() {
|
||||||
|
"ok" => *healthy += 1,
|
||||||
|
"warning" => *degraded += 1,
|
||||||
|
"critical" => *failed += 1,
|
||||||
|
_ => *degraded += 1,
|
||||||
|
}
|
||||||
|
services.push(ServiceData {
|
||||||
|
name: "CPU Temperature".to_string(),
|
||||||
|
status: match temp_status.as_str() {
|
||||||
|
"ok" => ServiceStatus::Running,
|
||||||
|
"warning" => ServiceStatus::Degraded,
|
||||||
|
"critical" => ServiceStatus::Stopped,
|
||||||
|
_ => ServiceStatus::Degraded,
|
||||||
|
},
|
||||||
|
memory_used_mb: 0.0,
|
||||||
|
memory_quota_mb: 0.0,
|
||||||
|
cpu_percent: cpu_temp,
|
||||||
|
sandbox_limit: None,
|
||||||
|
disk_used_gb: 0.0,
|
||||||
|
description: None,
|
||||||
|
sub_service: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Ok(disk_usage) = self.get_disk_usage().await {
|
||||||
|
services.push(ServiceData {
|
||||||
|
name: "Disk Usage".to_string(),
|
||||||
|
status: ServiceStatus::Running,
|
||||||
|
memory_used_mb: 0.0,
|
||||||
|
memory_quota_mb: 0.0,
|
||||||
|
cpu_percent: 0.0,
|
||||||
|
sandbox_limit: None,
|
||||||
|
disk_used_gb: disk_usage.used_gb,
|
||||||
|
description: None,
|
||||||
|
sub_service: false,
|
||||||
|
});
|
||||||
|
*healthy += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_cpu_load(&self) -> Result<(f32, f32, f32), CollectorError> {
|
||||||
|
let output = tokio::process::Command::new("uptime")
|
||||||
|
.output()
|
||||||
|
.await
|
||||||
|
.map_err(|e| CollectorError::CommandFailed {
|
||||||
|
command: "uptime".to_string(),
|
||||||
|
message: e.to_string()
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let uptime_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
|
||||||
|
if let Some(load_part) = uptime_str.split("load average:").nth(1) {
|
||||||
|
let load_str = load_part.trim();
|
||||||
|
let loads: Vec<&str> = load_str.split(", ").collect();
|
||||||
|
if loads.len() >= 3 {
|
||||||
|
let load_1 = loads[0].trim().replace(',', ".").parse::<f32>()
|
||||||
|
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 1min load".to_string() })?;
|
||||||
|
let load_5 = loads[1].trim().replace(',', ".").parse::<f32>()
|
||||||
|
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 5min load".to_string() })?;
|
||||||
|
let load_15 = loads[2].trim().replace(',', ".").parse::<f32>()
|
||||||
|
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 15min load".to_string() })?;
|
||||||
|
|
||||||
|
return Ok((load_1, load_5, load_15));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(CollectorError::ParseError { message: "Failed to parse load averages".to_string() })
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_cpu_temperature(&self) -> Option<f32> {
|
||||||
|
for i in 0..10 {
|
||||||
|
let type_path = format!("/sys/class/thermal/thermal_zone{}/type", i);
|
||||||
|
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
|
||||||
|
|
||||||
|
if let (Ok(zone_type), Ok(temp_str)) = (
|
||||||
|
fs::read_to_string(&type_path).await,
|
||||||
|
fs::read_to_string(&temp_path).await,
|
||||||
|
) {
|
||||||
|
let zone_type = zone_type.trim();
|
||||||
|
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
|
||||||
|
let temp_c = temp_millic / 1000.0;
|
||||||
|
if temp_c > 20.0 && temp_c < 150.0 {
|
||||||
|
if zone_type == "x86_pkg_temp" || zone_type.contains("coretemp") {
|
||||||
|
return Some(temp_c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in 0..10 {
|
||||||
|
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
|
||||||
|
if let Ok(temp_str) = fs::read_to_string(&temp_path).await {
|
||||||
|
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
|
||||||
|
let temp_c = temp_millic / 1000.0;
|
||||||
|
if temp_c > 20.0 && temp_c < 150.0 {
|
||||||
|
return Some(temp_c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_memory_info(&self) -> Result<(f32, f32), CollectorError> {
|
||||||
|
let meminfo = fs::read_to_string("/proc/meminfo")
|
||||||
|
.await
|
||||||
|
.map_err(|e| CollectorError::IoError { message: format!("Failed to read /proc/meminfo: {}", e) })?;
|
||||||
|
|
||||||
|
let mut mem_total = None;
|
||||||
|
let mut mem_available = None;
|
||||||
|
|
||||||
|
for line in meminfo.lines() {
|
||||||
|
if let Some(value_str) = line.strip_prefix("MemTotal:") {
|
||||||
|
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
|
||||||
|
mem_total = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
|
||||||
|
}
|
||||||
|
} else if let Some(value_str) = line.strip_prefix("MemAvailable:") {
|
||||||
|
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
|
||||||
|
mem_available = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match (mem_total, mem_available) {
|
||||||
|
(Some(total), Some(available)) => {
|
||||||
|
let used = total - available;
|
||||||
|
Ok((used, total))
|
||||||
|
}
|
||||||
|
_ => Err(CollectorError::ParseError { message: "Failed to parse memory info".to_string() }),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn determine_cpu_status(&self, load_1: f32) -> String {
|
||||||
|
if load_1 >= 8.0 {
|
||||||
|
"critical".to_string()
|
||||||
|
} else if load_1 >= 5.0 {
|
||||||
|
"warning".to_string()
|
||||||
|
} else {
|
||||||
|
"ok".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn determine_memory_status(&self, used_mb: f32, total_mb: f32) -> String {
|
||||||
|
let usage_percent = (used_mb / total_mb) * 100.0;
|
||||||
|
if usage_percent >= 95.0 {
|
||||||
|
"critical".to_string()
|
||||||
|
} else if usage_percent >= 80.0 {
|
||||||
|
"warning".to_string()
|
||||||
|
} else {
|
||||||
|
"ok".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn determine_cpu_temp_status(&self, temp_c: f32) -> String {
|
||||||
|
if temp_c >= 100.0 {
|
||||||
|
"critical".to_string()
|
||||||
|
} else if temp_c >= 100.0 {
|
||||||
|
"warning".to_string()
|
||||||
|
} else {
|
||||||
|
"ok".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn get_service_description_with_cache(&self, service: &str) -> Option<Vec<String>> {
|
async fn get_service_description_with_cache(&self, service: &str) -> Option<Vec<String>> {
|
||||||
// Check if we should update the cache (throttled)
|
// Check if we should update the cache (throttled)
|
||||||
let should_update = self.should_update_description(service).await;
|
let should_update = self.should_update_description(service).await;
|
||||||
@ -676,7 +897,7 @@ impl ServiceCollector {
|
|||||||
let mut accessible_sites = Vec::new();
|
let mut accessible_sites = Vec::new();
|
||||||
for site in sites {
|
for site in sites {
|
||||||
if self.check_site_accessibility(&site).await {
|
if self.check_site_accessibility(&site).await {
|
||||||
accessible_sites.push(format!("{} ✓", site));
|
accessible_sites.push(site); // Remove checkmark - status will be shown via sub_service row status
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -791,6 +1012,9 @@ impl Collector for ServiceCollector {
|
|||||||
let mut total_memory_quota = 0.0;
|
let mut total_memory_quota = 0.0;
|
||||||
let mut total_disk_used = 0.0;
|
let mut total_disk_used = 0.0;
|
||||||
|
|
||||||
|
// Add system metrics as service entries first
|
||||||
|
self.add_system_metrics_as_services(&mut services, &mut healthy, &mut degraded, &mut failed).await;
|
||||||
|
|
||||||
// Collect data from all configured services
|
// Collect data from all configured services
|
||||||
for service in &self.services {
|
for service in &self.services {
|
||||||
match self.get_service_status(service).await {
|
match self.get_service_status(service).await {
|
||||||
@ -807,8 +1031,40 @@ impl Collector for ServiceCollector {
|
|||||||
}
|
}
|
||||||
total_disk_used += service_data.disk_used_gb;
|
total_disk_used += service_data.disk_used_gb;
|
||||||
|
|
||||||
|
// Handle nginx specially - create sub-services for sites
|
||||||
|
if service == "nginx" && matches!(service_data.status, ServiceStatus::Running) {
|
||||||
|
// Clear nginx description - sites will become individual sub-services
|
||||||
|
let mut nginx_service = service_data;
|
||||||
|
nginx_service.description = None;
|
||||||
|
services.push(nginx_service);
|
||||||
|
|
||||||
|
// Add nginx sites as individual sub-services
|
||||||
|
if let Some(sites) = self.get_nginx_sites().await {
|
||||||
|
for (i, site) in sites.iter().enumerate() {
|
||||||
|
let site_name = if i == sites.len() - 1 {
|
||||||
|
format!("└─ {}", site)
|
||||||
|
} else {
|
||||||
|
format!("├─ {}", site)
|
||||||
|
};
|
||||||
|
|
||||||
|
services.push(ServiceData {
|
||||||
|
name: site_name,
|
||||||
|
status: ServiceStatus::Running, // Assume sites are running if nginx is running
|
||||||
|
memory_used_mb: 0.0,
|
||||||
|
memory_quota_mb: 0.0,
|
||||||
|
cpu_percent: 0.0,
|
||||||
|
sandbox_limit: None,
|
||||||
|
disk_used_gb: 0.0,
|
||||||
|
description: None,
|
||||||
|
sub_service: true,
|
||||||
|
});
|
||||||
|
healthy += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
services.push(service_data);
|
services.push(service_data);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
failed += 1;
|
failed += 1;
|
||||||
// Add a placeholder service entry for failed collection
|
// Add a placeholder service entry for failed collection
|
||||||
@ -821,6 +1077,7 @@ impl Collector for ServiceCollector {
|
|||||||
sandbox_limit: None,
|
sandbox_limit: None,
|
||||||
disk_used_gb: 0.0,
|
disk_used_gb: 0.0,
|
||||||
description: None,
|
description: None,
|
||||||
|
sub_service: false,
|
||||||
});
|
});
|
||||||
tracing::warn!("Failed to collect metrics for service {}: {}", service, e);
|
tracing::warn!("Failed to collect metrics for service {}: {}", service, e);
|
||||||
}
|
}
|
||||||
@ -879,6 +1136,8 @@ struct ServiceData {
|
|||||||
disk_used_gb: f32,
|
disk_used_gb: f32,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
description: Option<Vec<String>>,
|
description: Option<Vec<String>>,
|
||||||
|
#[serde(default)]
|
||||||
|
sub_service: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
|||||||
@ -116,6 +116,8 @@ pub struct ServiceInfo {
|
|||||||
pub disk_used_gb: f32,
|
pub disk_used_gb: f32,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub description: Option<Vec<String>>,
|
pub description: Option<Vec<String>>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub sub_service: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
|||||||
@ -6,7 +6,7 @@ use ratatui::Frame;
|
|||||||
|
|
||||||
use crate::app::App;
|
use crate::app::App;
|
||||||
|
|
||||||
use super::{alerts, backup, services, storage, system};
|
use super::{hosts, backup, services, storage, system};
|
||||||
|
|
||||||
pub fn render(frame: &mut Frame, app: &App) {
|
pub fn render(frame: &mut Frame, app: &App) {
|
||||||
let host_summaries = app.host_display_data();
|
let host_summaries = app.host_display_data();
|
||||||
@ -56,7 +56,7 @@ pub fn render(frame: &mut Frame, app: &App) {
|
|||||||
backup::render(frame, primary_host.as_ref(), left_widgets[2]);
|
backup::render(frame, primary_host.as_ref(), left_widgets[2]);
|
||||||
services::render(frame, primary_host.as_ref(), services_area);
|
services::render(frame, primary_host.as_ref(), services_area);
|
||||||
|
|
||||||
alerts::render(frame, &host_summaries, left_side[1]);
|
hosts::render(frame, &host_summaries, left_side[1]);
|
||||||
|
|
||||||
if app.help_visible() {
|
if app.help_visible() {
|
||||||
render_help(frame, size);
|
render_help(frame, size);
|
||||||
|
|||||||
@ -9,13 +9,13 @@ use crate::ui::widget::{render_widget_data, WidgetData, WidgetStatus, StatusLeve
|
|||||||
pub fn render(frame: &mut Frame, hosts: &[HostDisplayData], area: Rect) {
|
pub fn render(frame: &mut Frame, hosts: &[HostDisplayData], area: Rect) {
|
||||||
let (severity, _ok_count, _warn_count, _fail_count) = classify_hosts(hosts);
|
let (severity, _ok_count, _warn_count, _fail_count) = classify_hosts(hosts);
|
||||||
|
|
||||||
let title = "Alerts".to_string();
|
let title = "Hosts".to_string();
|
||||||
|
|
||||||
let widget_status = match severity {
|
let widget_status = match severity {
|
||||||
AlertSeverity::Critical => StatusLevel::Error,
|
HostSeverity::Critical => StatusLevel::Error,
|
||||||
AlertSeverity::Warning => StatusLevel::Warning,
|
HostSeverity::Warning => StatusLevel::Warning,
|
||||||
AlertSeverity::Healthy => StatusLevel::Ok,
|
HostSeverity::Healthy => StatusLevel::Ok,
|
||||||
AlertSeverity::Unknown => StatusLevel::Unknown,
|
HostSeverity::Unknown => StatusLevel::Unknown,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut data = WidgetData::new(
|
let mut data = WidgetData::new(
|
||||||
@ -38,10 +38,10 @@ pub fn render(frame: &mut Frame, hosts: &[HostDisplayData], area: Rect) {
|
|||||||
for host in hosts {
|
for host in hosts {
|
||||||
let (status_text, severity, _emphasize) = host_status(host);
|
let (status_text, severity, _emphasize) = host_status(host);
|
||||||
let status_level = match severity {
|
let status_level = match severity {
|
||||||
AlertSeverity::Critical => StatusLevel::Error,
|
HostSeverity::Critical => StatusLevel::Error,
|
||||||
AlertSeverity::Warning => StatusLevel::Warning,
|
HostSeverity::Warning => StatusLevel::Warning,
|
||||||
AlertSeverity::Healthy => StatusLevel::Ok,
|
HostSeverity::Healthy => StatusLevel::Ok,
|
||||||
AlertSeverity::Unknown => StatusLevel::Unknown,
|
HostSeverity::Unknown => StatusLevel::Unknown,
|
||||||
};
|
};
|
||||||
let update = latest_timestamp(host)
|
let update = latest_timestamp(host)
|
||||||
.map(|ts| ts.format("%Y-%m-%d %H:%M:%S").to_string())
|
.map(|ts| ts.format("%Y-%m-%d %H:%M:%S").to_string())
|
||||||
@ -63,14 +63,14 @@ pub fn render(frame: &mut Frame, hosts: &[HostDisplayData], area: Rect) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||||
enum AlertSeverity {
|
enum HostSeverity {
|
||||||
Healthy,
|
Healthy,
|
||||||
Warning,
|
Warning,
|
||||||
Critical,
|
Critical,
|
||||||
Unknown,
|
Unknown,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn classify_hosts(hosts: &[HostDisplayData]) -> (AlertSeverity, usize, usize, usize) {
|
fn classify_hosts(hosts: &[HostDisplayData]) -> (HostSeverity, usize, usize, usize) {
|
||||||
let mut ok = 0;
|
let mut ok = 0;
|
||||||
let mut warn = 0;
|
let mut warn = 0;
|
||||||
let mut fail = 0;
|
let mut fail = 0;
|
||||||
@ -78,81 +78,81 @@ fn classify_hosts(hosts: &[HostDisplayData]) -> (AlertSeverity, usize, usize, us
|
|||||||
for host in hosts {
|
for host in hosts {
|
||||||
let severity = host_severity(host);
|
let severity = host_severity(host);
|
||||||
match severity {
|
match severity {
|
||||||
AlertSeverity::Healthy => ok += 1,
|
HostSeverity::Healthy => ok += 1,
|
||||||
AlertSeverity::Warning => warn += 1,
|
HostSeverity::Warning => warn += 1,
|
||||||
AlertSeverity::Critical => fail += 1,
|
HostSeverity::Critical => fail += 1,
|
||||||
AlertSeverity::Unknown => warn += 1,
|
HostSeverity::Unknown => warn += 1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let highest = if fail > 0 {
|
let highest = if fail > 0 {
|
||||||
AlertSeverity::Critical
|
HostSeverity::Critical
|
||||||
} else if warn > 0 {
|
} else if warn > 0 {
|
||||||
AlertSeverity::Warning
|
HostSeverity::Warning
|
||||||
} else if ok > 0 {
|
} else if ok > 0 {
|
||||||
AlertSeverity::Healthy
|
HostSeverity::Healthy
|
||||||
} else {
|
} else {
|
||||||
AlertSeverity::Unknown
|
HostSeverity::Unknown
|
||||||
};
|
};
|
||||||
|
|
||||||
(highest, ok, warn, fail)
|
(highest, ok, warn, fail)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn host_severity(host: &HostDisplayData) -> AlertSeverity {
|
fn host_severity(host: &HostDisplayData) -> HostSeverity {
|
||||||
// Check connection status first
|
// Check connection status first
|
||||||
match host.connection_status {
|
match host.connection_status {
|
||||||
ConnectionStatus::Error => return AlertSeverity::Critical,
|
ConnectionStatus::Error => return HostSeverity::Critical,
|
||||||
ConnectionStatus::Timeout => return AlertSeverity::Warning,
|
ConnectionStatus::Timeout => return HostSeverity::Warning,
|
||||||
ConnectionStatus::Unknown => return AlertSeverity::Unknown,
|
ConnectionStatus::Unknown => return HostSeverity::Unknown,
|
||||||
ConnectionStatus::Connected => {}, // Continue with other checks
|
ConnectionStatus::Connected => {}, // Continue with other checks
|
||||||
}
|
}
|
||||||
|
|
||||||
if host.last_error.is_some() {
|
if host.last_error.is_some() {
|
||||||
return AlertSeverity::Critical;
|
return HostSeverity::Critical;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(smart) = host.smart.as_ref() {
|
if let Some(smart) = host.smart.as_ref() {
|
||||||
if smart.summary.critical > 0 {
|
if smart.summary.critical > 0 {
|
||||||
return AlertSeverity::Critical;
|
return HostSeverity::Critical;
|
||||||
}
|
}
|
||||||
if smart.summary.warning > 0 || !smart.issues.is_empty() {
|
if smart.summary.warning > 0 || !smart.issues.is_empty() {
|
||||||
return AlertSeverity::Warning;
|
return HostSeverity::Warning;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(services) = host.services.as_ref() {
|
if let Some(services) = host.services.as_ref() {
|
||||||
if services.summary.failed > 0 {
|
if services.summary.failed > 0 {
|
||||||
return AlertSeverity::Critical;
|
return HostSeverity::Critical;
|
||||||
}
|
}
|
||||||
if services.summary.degraded > 0 {
|
if services.summary.degraded > 0 {
|
||||||
return AlertSeverity::Warning;
|
return HostSeverity::Warning;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Update to use agent-provided system statuses instead of evaluate_performance
|
// TODO: Update to use agent-provided system statuses instead of evaluate_performance
|
||||||
// let (perf_severity, _) = evaluate_performance(&services.summary);
|
// let (perf_severity, _) = evaluate_performance(&services.summary);
|
||||||
// match perf_severity {
|
// match perf_severity {
|
||||||
// PerfSeverity::Critical => return AlertSeverity::Critical,
|
// PerfSeverity::Critical => return HostSeverity::Critical,
|
||||||
// PerfSeverity::Warning => return AlertSeverity::Warning,
|
// PerfSeverity::Warning => return HostSeverity::Warning,
|
||||||
// PerfSeverity::Ok => {}
|
// PerfSeverity::Ok => {}
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(backup) = host.backup.as_ref() {
|
if let Some(backup) = host.backup.as_ref() {
|
||||||
match backup.overall_status.as_str() {
|
match backup.overall_status.as_str() {
|
||||||
"critical" => return AlertSeverity::Critical,
|
"critical" => return HostSeverity::Critical,
|
||||||
"warning" => return AlertSeverity::Warning,
|
"warning" => return HostSeverity::Warning,
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if host.smart.is_none() && host.services.is_none() && host.backup.is_none() {
|
if host.smart.is_none() && host.services.is_none() && host.backup.is_none() {
|
||||||
AlertSeverity::Unknown
|
HostSeverity::Unknown
|
||||||
} else {
|
} else {
|
||||||
AlertSeverity::Healthy
|
HostSeverity::Healthy
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
fn host_status(host: &HostDisplayData) -> (String, HostSeverity, bool) {
|
||||||
// Check connection status first
|
// Check connection status first
|
||||||
match host.connection_status {
|
match host.connection_status {
|
||||||
ConnectionStatus::Error => {
|
ConnectionStatus::Error => {
|
||||||
@ -161,7 +161,7 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
|||||||
} else {
|
} else {
|
||||||
"Connection error".to_string()
|
"Connection error".to_string()
|
||||||
};
|
};
|
||||||
return (msg, AlertSeverity::Critical, true);
|
return (msg, HostSeverity::Critical, true);
|
||||||
},
|
},
|
||||||
ConnectionStatus::Timeout => {
|
ConnectionStatus::Timeout => {
|
||||||
let msg = if let Some(error) = &host.last_error {
|
let msg = if let Some(error) = &host.last_error {
|
||||||
@ -169,28 +169,28 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
|||||||
} else {
|
} else {
|
||||||
"Keep-alive timeout".to_string()
|
"Keep-alive timeout".to_string()
|
||||||
};
|
};
|
||||||
return (msg, AlertSeverity::Warning, true);
|
return (msg, HostSeverity::Warning, true);
|
||||||
},
|
},
|
||||||
ConnectionStatus::Unknown => {
|
ConnectionStatus::Unknown => {
|
||||||
return ("No data received".to_string(), AlertSeverity::Unknown, true);
|
return ("No data received".to_string(), HostSeverity::Unknown, true);
|
||||||
},
|
},
|
||||||
ConnectionStatus::Connected => {}, // Continue with other checks
|
ConnectionStatus::Connected => {}, // Continue with other checks
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(error) = &host.last_error {
|
if let Some(error) = &host.last_error {
|
||||||
return (format!("error: {}", error), AlertSeverity::Critical, true);
|
return (format!("error: {}", error), HostSeverity::Critical, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(smart) = host.smart.as_ref() {
|
if let Some(smart) = host.smart.as_ref() {
|
||||||
if smart.summary.critical > 0 {
|
if smart.summary.critical > 0 {
|
||||||
return (
|
return (
|
||||||
"critical: SMART critical".to_string(),
|
"critical: SMART critical".to_string(),
|
||||||
AlertSeverity::Critical,
|
HostSeverity::Critical,
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if let Some(issue) = smart.issues.first() {
|
if let Some(issue) = smart.issues.first() {
|
||||||
return (format!("warning: {}", issue), AlertSeverity::Warning, true);
|
return (format!("warning: {}", issue), HostSeverity::Warning, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,14 +198,14 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
|||||||
if services.summary.failed > 0 {
|
if services.summary.failed > 0 {
|
||||||
return (
|
return (
|
||||||
format!("critical: {} failed svc", services.summary.failed),
|
format!("critical: {} failed svc", services.summary.failed),
|
||||||
AlertSeverity::Critical,
|
HostSeverity::Critical,
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if services.summary.degraded > 0 {
|
if services.summary.degraded > 0 {
|
||||||
return (
|
return (
|
||||||
format!("warning: {} degraded svc", services.summary.degraded),
|
format!("warning: {} degraded svc", services.summary.degraded),
|
||||||
AlertSeverity::Warning,
|
HostSeverity::Warning,
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -217,14 +217,14 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
|||||||
// PerfSeverity::Critical => {
|
// PerfSeverity::Critical => {
|
||||||
// return (
|
// return (
|
||||||
// format!("critical: {}", reason_text),
|
// format!("critical: {}", reason_text),
|
||||||
// AlertSeverity::Critical,
|
// HostSeverity::Critical,
|
||||||
// true,
|
// true,
|
||||||
// );
|
// );
|
||||||
// }
|
// }
|
||||||
// PerfSeverity::Warning => {
|
// PerfSeverity::Warning => {
|
||||||
// return (
|
// return (
|
||||||
// format!("warning: {}", reason_text),
|
// format!("warning: {}", reason_text),
|
||||||
// AlertSeverity::Warning,
|
// HostSeverity::Warning,
|
||||||
// true,
|
// true,
|
||||||
// );
|
// );
|
||||||
// }
|
// }
|
||||||
@ -238,14 +238,14 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
|||||||
"critical" => {
|
"critical" => {
|
||||||
return (
|
return (
|
||||||
"critical: backup failed".to_string(),
|
"critical: backup failed".to_string(),
|
||||||
AlertSeverity::Critical,
|
HostSeverity::Critical,
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
"warning" => {
|
"warning" => {
|
||||||
return (
|
return (
|
||||||
"warning: backup warning".to_string(),
|
"warning: backup warning".to_string(),
|
||||||
AlertSeverity::Warning,
|
HostSeverity::Warning,
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -260,10 +260,10 @@ fn host_status(host: &HostDisplayData) -> (String, AlertSeverity, bool) {
|
|||||||
"pending: no recent data"
|
"pending: no recent data"
|
||||||
};
|
};
|
||||||
|
|
||||||
return (status.to_string(), AlertSeverity::Warning, false);
|
return (status.to_string(), HostSeverity::Warning, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
("ok".to_string(), AlertSeverity::Healthy, false)
|
("ok".to_string(), HostSeverity::Healthy, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1,4 +1,4 @@
|
|||||||
pub mod alerts;
|
pub mod hosts;
|
||||||
pub mod backup;
|
pub mod backup;
|
||||||
pub mod dashboard;
|
pub mod dashboard;
|
||||||
pub mod services;
|
pub mod services;
|
||||||
|
|||||||
@ -91,6 +91,20 @@ fn render_metrics(
|
|||||||
vec![]
|
vec![]
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if svc.sub_service {
|
||||||
|
// Sub-services only show name and status, no memory/CPU/disk data
|
||||||
|
data.add_row(
|
||||||
|
Some(WidgetStatus::new(status_level)),
|
||||||
|
description,
|
||||||
|
vec![
|
||||||
|
svc.name.clone(),
|
||||||
|
"".to_string(),
|
||||||
|
"".to_string(),
|
||||||
|
"".to_string(),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// Regular services show all columns
|
||||||
data.add_row(
|
data.add_row(
|
||||||
Some(WidgetStatus::new(status_level)),
|
Some(WidgetStatus::new(status_level)),
|
||||||
description,
|
description,
|
||||||
@ -102,6 +116,7 @@ fn render_metrics(
|
|||||||
],
|
],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
render_widget_data(frame, area, data);
|
render_widget_data(frame, area, data);
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user