Compare commits
300 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1656f20e96 | |||
| dcd350ec2c | |||
| a34b095857 | |||
| 7362464b46 | |||
| c8b79576fa | |||
| f53df5440b | |||
| d1b0e2c431 | |||
| b1719a60fc | |||
| d922e8d6f3 | |||
| 407bc9dbc2 | |||
| 3c278351c9 | |||
| 8da4522d85 | |||
| 5b1e39cfca | |||
| ffecbc3166 | |||
| 49f9504429 | |||
| bc9015e96b | |||
| aaec8e691c | |||
| 4a8cfbbde4 | |||
| d93260529b | |||
| 41e1be451e | |||
| 2863526ec8 | |||
| 5da9213da6 | |||
| a7755f02ae | |||
| b886fb2045 | |||
| cfb02e1763 | |||
| 5b53ca3d52 | |||
| 92a30913b4 | |||
| a288a8ef9a | |||
| c65d596099 | |||
| 98ed17947d | |||
| 1cb6abf58a | |||
| 477724b4f4 | |||
| 7a3ed17952 | |||
| 7e1962a168 | |||
| 5bb7d6cf57 | |||
| 7a0dc27846 | |||
| 5bc250a738 | |||
| 5c3ac8b15e | |||
| bdfff942f7 | |||
| 47ab1e387d | |||
| 966ba27b1e | |||
| 6c6c9144bd | |||
| 3fdcec8047 | |||
| 1fcaf4a670 | |||
| 885e19f7fd | |||
| a7b69b8ae7 | |||
| 2d290f40b2 | |||
| ad1fcaa27b | |||
| 60ab4d4f9e | |||
| 67034c84b9 | |||
| c62c7fa698 | |||
| 0b1d8c0a73 | |||
| c77aa6eaaa | |||
| 8a0e68f0e3 | |||
| 2d653fe9ae | |||
| caba78004e | |||
| 77bf08a978 | |||
| 929870f8b6 | |||
| 7aae852b7b | |||
| 40f3ff66d8 | |||
| 1c1beddb55 | |||
| 620d1f10b6 | |||
| a0d571a40e | |||
| 977200fff3 | |||
| d692de5f83 | |||
| f5913dbd43 | |||
| faa30a7839 | |||
| 6e4a42799f | |||
| afb8d68e03 | |||
| 5e08b34280 | |||
| 0d8284b69c | |||
| d84690cb3b | |||
| 7c030b33d6 | |||
| c6817537a8 | |||
| 2189d34b16 | |||
| 28cfd5758f | |||
| 5deb8cf8d8 | |||
| 0e01813ff5 | |||
| c3c9507a42 | |||
| 4d77ffe17e | |||
| 14f74b4cac | |||
| 67b686f8c7 | |||
| e3996fdb84 | |||
| f94ca60e69 | |||
| c19ff56df8 | |||
| fe2f604703 | |||
| 8bfd416327 | |||
| 85c6c624fb | |||
| eab3f17428 | |||
| 7ad149bbe4 | |||
| b444c88ea0 | |||
| 317cf76bd1 | |||
| 0db1a165b9 | |||
| 3c2955376d | |||
| f09ccabc7f | |||
| 43dd5a901a | |||
| 01e1f33b66 | |||
| ed6399b914 | |||
| 14618c59c6 | |||
| 2740de9b54 | |||
| 37f2650200 | |||
| 833010e270 | |||
| 549d9d1c72 | |||
| 9b84b70581 | |||
| 92c3ee3f2a | |||
| 1be55f765d | |||
| 2f94a4b853 | |||
| ff2b43827a | |||
| fac0188c6f | |||
| 6bb350f016 | |||
| 374b126446 | |||
| 76c04633b5 | |||
| 1e0510be81 | |||
| 9a2df906ea | |||
| 6d6beb207d | |||
| 7a68da01f5 | |||
| 5be67fed64 | |||
| cac836601b | |||
| bd22ce265b | |||
| bbc8b7b1cb | |||
| 5dd8cadef3 | |||
| fefe30ec51 | |||
| fb40cce748 | |||
| eaa057b284 | |||
| f23a1b5cec | |||
| 3f98f68b51 | |||
| 3d38a7a984 | |||
| b0ee0242bd | |||
| 8f9e9eabca | |||
| 937f4ad427 | |||
| 8aefab83ae | |||
| 748a9f3a3b | |||
| 5c6b11c794 | |||
| 9f0aa5f806 | |||
| fc247bd0ad | |||
| 00fe8c28ab | |||
| fbbb4a4cfb | |||
| 53e1d8bbce | |||
| 1b9fecea98 | |||
| b7ffeaced5 | |||
| 3858309a5d | |||
| df104bf940 | |||
| d5ce36ee18 | |||
| 4f80701671 | |||
| 267654fda4 | |||
| dc1105eefe | |||
| c9d12793ef | |||
| 8f80015273 | |||
| 7a95a9d762 | |||
| 7b11db990c | |||
| 67b59e9551 | |||
| da37e28b6a | |||
| d89b3ac881 | |||
| 7f26991609 | |||
| 75ec190b93 | |||
| eb892096d9 | |||
| c006625a3f | |||
| dcd5fff8c1 | |||
| 9357e5f2a8 | |||
| d164c1da5f | |||
| b120f95f8a | |||
| 66ab7a492d | |||
| 4d615a7f45 | |||
| fd7ad23205 | |||
| 2b2cb2da3e | |||
| 11d1c2dc94 | |||
| bea2d120b5 | |||
| 5394164123 | |||
| 4329cd26e0 | |||
| b85bd6b153 | |||
| c9b2d5e342 | |||
| b2b301332f | |||
| adf3b0f51c | |||
| 41ded0170c | |||
| 9b4191b2c3 | |||
| 53dbb43352 | |||
| ba03623110 | |||
| f24c4ed650 | |||
| 86501fd486 | |||
| 192eea6e0c | |||
| 43fb838c9b | |||
| 54483653f9 | |||
| e47803b705 | |||
| 439d0d9af6 | |||
| 2242b5ddfe | |||
| 9d0f42d55c | |||
| 1da7b5f6e7 | |||
| 006f27f7d9 | |||
| 07422cd0a7 | |||
| de30b80219 | |||
| 7d96ca9fad | |||
| 9b940ebd19 | |||
| 6d4da1b7da | |||
| 1e7f1616aa | |||
| 7a3ee3d5ba | |||
| 0e8b149718 | |||
| 2c27d0e1db | |||
| 9f18488752 | |||
| fab6404cca | |||
| c3626cc362 | |||
| d68ecfbc64 | |||
| d1272a6c13 | |||
| 33b3beb342 | |||
| f9384d9df6 | |||
| 156d707377 | |||
| dc1a2e3a0f | |||
| 5d6b8e6253 | |||
| 0cba083305 | |||
| a6be7a4788 | |||
| 2384f7f9b9 | |||
| cd5ef65d3d | |||
| 7bf9ca6201 | |||
| f587b42797 | |||
| 7ae464e172 | |||
| 980c9a20a2 | |||
| 448a38dede | |||
| f12e20b0f3 | |||
| 564d1f37e7 | |||
| 65bfb9f617 | |||
| 4f4ef6259b | |||
| 505263cec6 | |||
| 61dd686fb9 | |||
| c0f7a97a6f | |||
| 9575077045 | |||
| 34a1f7b9dc | |||
| d11aa11f99 | |||
| 0ca06d2507 | |||
| 6693f3a05f | |||
| de252d27b9 | |||
| db0e41a7d3 | |||
| ec460496d8 | |||
| 33e700529e | |||
| d644b7d40a | |||
| f635ba9c75 | |||
| 76b6e3373e | |||
| 0a13cab897 | |||
| d33ec5d225 | |||
| d31c2384df | |||
| c8db463204 | |||
| e8e50ef9bb | |||
| 0faed9309e | |||
| c980346d05 | |||
| 3e3d3f0c2b | |||
| 9eb7444d56 | |||
| 278d1763aa | |||
| f874264e13 | |||
| 5f6e47ece5 | |||
| 0e7cf24dbb | |||
| 2d080a2f51 | |||
| 6179bd51a7 | |||
| 57de4c366a | |||
| e18778e962 | |||
| e4469a0ebf | |||
| 6fedf4c7fc | |||
| 3f6dffa66e | |||
| 1b64fbde3d | |||
| 4f4c3b0d6e | |||
| bd20f0cae1 | |||
| 11c9a5f9d2 | |||
| aeae60146d | |||
| a82c81e8e3 | |||
| c56e9d7be2 | |||
| c8f800a1e5 | |||
| fc6b3424cf | |||
| 35e06c6734 | |||
| 783d233319 | |||
| 6509a2b91a | |||
| 52f8c40b86 | |||
| a86b5ba8f9 | |||
| 1b964545be | |||
| 97aa1708c2 | |||
| d12689f3b5 | |||
| f22e3ee95e | |||
| e890c5e810 | |||
| 078c30a592 | |||
| a847674004 | |||
| 2618f6b62f | |||
| c3fc5a181d | |||
| 3f45a172b3 | |||
| 5b12c12228 | |||
| 651b801de3 | |||
| 71b9f93d7c | |||
| ae70946c61 | |||
| 2910b7d875 | |||
| 43242debce | |||
| a2519b2814 | |||
| 91f037aa3e | |||
| 627c533724 | |||
| b1bff4857b | |||
| f8a061d496 | |||
| e61a845965 | |||
| ac5d2d4db5 | |||
| 69892a2d84 | |||
| a928d73134 | |||
| af52d49194 | |||
| bc94f75328 | |||
| b6da71b7e7 | |||
| aaf7edfbce | |||
| bb72c42726 | |||
| af5f96ce2f |
@ -113,13 +113,13 @@ jobs:
|
|||||||
NIX_HASH="sha256-$(python3 -c "import base64, binascii; print(base64.b64encode(binascii.unhexlify('$NEW_HASH')).decode())")"
|
NIX_HASH="sha256-$(python3 -c "import base64, binascii; print(base64.b64encode(binascii.unhexlify('$NEW_HASH')).decode())")"
|
||||||
|
|
||||||
# Update the NixOS configuration
|
# Update the NixOS configuration
|
||||||
sed -i "s/version = \"v[^\"]*\"/version = \"$VERSION\"/" hosts/common/cm-dashboard.nix
|
sed -i "s|version = \"v[^\"]*\"|version = \"$VERSION\"|" services/cm-dashboard.nix
|
||||||
sed -i "s/sha256 = \"sha256-[^\"]*\"/sha256 = \"$NIX_HASH\"/" hosts/common/cm-dashboard.nix
|
sed -i "s|sha256 = \"sha256-[^\"]*\"|sha256 = \"$NIX_HASH\"|" services/cm-dashboard.nix
|
||||||
|
|
||||||
# Commit and push changes
|
# Commit and push changes
|
||||||
git config user.name "Gitea Actions"
|
git config user.name "Gitea Actions"
|
||||||
git config user.email "actions@gitea.cmtec.se"
|
git config user.email "actions@gitea.cmtec.se"
|
||||||
git add hosts/common/cm-dashboard.nix
|
git add services/cm-dashboard.nix
|
||||||
git commit -m "Auto-update cm-dashboard to $VERSION
|
git commit -m "Auto-update cm-dashboard to $VERSION
|
||||||
|
|
||||||
- Update version to $VERSION with automated release
|
- Update version to $VERSION with automated release
|
||||||
|
|||||||
@ -1,3 +0,0 @@
|
|||||||
# Agent Guide
|
|
||||||
|
|
||||||
Agents working in this repo must follow the instructions in `CLAUDE.md`.
|
|
||||||
571
CLAUDE.md
571
CLAUDE.md
@ -2,207 +2,173 @@
|
|||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure. Built to replace Glance with a custom solution tailored for our specific monitoring needs and ZMQ-based metric collection.
|
A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure. Built with ZMQ-based metric collection and individual metrics architecture.
|
||||||
|
|
||||||
## Implementation Strategy
|
## Current Features
|
||||||
|
|
||||||
### Current Implementation Status
|
### Core Functionality
|
||||||
|
|
||||||
**System Panel Enhancement - COMPLETED** ✅
|
- **Real-time Monitoring**: CPU, RAM, Storage, and Service status
|
||||||
|
- **Service Management**: Start/stop services with user-stopped tracking
|
||||||
|
- **Multi-host Support**: Monitor multiple servers from single dashboard
|
||||||
|
- **NixOS Integration**: System rebuild via SSH + tmux popup
|
||||||
|
- **Backup Monitoring**: Borgbackup status and scheduling
|
||||||
|
|
||||||
All system panel features successfully implemented:
|
### User-Stopped Service Tracking
|
||||||
- ✅ **NixOS Collector**: Created collector for version and active users
|
|
||||||
- ✅ **System Widget**: Unified widget combining NixOS, CPU, RAM, and Storage
|
|
||||||
- ✅ **Build Display**: Shows NixOS build information without codename
|
|
||||||
- ✅ **Active Users**: Displays currently logged in users
|
|
||||||
- ✅ **Tmpfs Monitoring**: Added /tmp usage to RAM section
|
|
||||||
- ✅ **Agent Deployment**: NixOS collector working in production
|
|
||||||
|
|
||||||
**Keyboard Navigation and Service Management - COMPLETED** ✅
|
- Services stopped via dashboard are marked as "user-stopped"
|
||||||
|
- User-stopped services report Status::OK instead of Warning
|
||||||
|
- Prevents false alerts during intentional maintenance
|
||||||
|
- Persistent storage survives agent restarts
|
||||||
|
- Automatic flag clearing when services are restarted via dashboard
|
||||||
|
|
||||||
All keyboard navigation and service selection features successfully implemented:
|
### Custom Service Logs
|
||||||
- ✅ **Panel Navigation**: Shift+Tab cycles through visible panels only (System → Services → Backup)
|
|
||||||
- ✅ **Service Selection**: Up/Down arrows navigate through parent services with visual cursor
|
|
||||||
- ✅ **Focus Management**: Selection highlighting only visible when Services panel focused
|
|
||||||
- ✅ **Status Preservation**: Service health colors maintained during selection (green/red icons)
|
|
||||||
- ✅ **Smart Panel Switching**: Only cycles through panels with data (backup panel conditional)
|
|
||||||
- ✅ **Scroll Support**: All panels support content scrolling with proper overflow indicators
|
|
||||||
|
|
||||||
**Current Status - October 25, 2025:**
|
- Configure service-specific log file paths per host in dashboard config
|
||||||
- All keyboard navigation features working correctly ✅
|
- Press `L` on any service to view custom log files via `tail -f`
|
||||||
- Service selection cursor implemented with focus-aware highlighting ✅
|
- Configuration format in dashboard config:
|
||||||
- Panel scrolling fixed for System, Services, and Backup panels ✅
|
|
||||||
- Build display working: "Build: 25.05.20251004.3bcc93c" ✅
|
|
||||||
- Configuration hash display: Currently shows git hash, needs to be fixed ❌
|
|
||||||
|
|
||||||
**Target Layout:**
|
```toml
|
||||||
```
|
[service_logs]
|
||||||
NixOS:
|
hostname1 = [
|
||||||
Build: 25.05.20251004.3bcc93c
|
{ service_name = "nginx", log_file_path = "/var/log/nginx/access.log" },
|
||||||
Config: d8ivwiar # Should show nix store hash (8 chars) from deployed system
|
{ service_name = "app", log_file_path = "/var/log/myapp/app.log" }
|
||||||
Active users: cm, simon
|
]
|
||||||
CPU:
|
hostname2 = [
|
||||||
● Load: 0.02 0.31 0.86 • 3000MHz
|
{ service_name = "database", log_file_path = "/var/log/postgres/postgres.log" }
|
||||||
RAM:
|
]
|
||||||
● Usage: 33% 2.6GB/7.6GB
|
|
||||||
● /tmp: 0% 0B/2.0GB
|
|
||||||
Storage:
|
|
||||||
● root (Single):
|
|
||||||
├─ ● nvme0n1 W: 1%
|
|
||||||
└─ ● 18% 167.4GB/928.2GB
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**System panel layout fully implemented with blue tree symbols ✅**
|
### Service Management
|
||||||
**Tree symbols now use consistent blue theming across all panels ✅**
|
|
||||||
**Overflow handling restored for all widgets ("... and X more") ✅**
|
|
||||||
**Agent hash display working correctly ✅**
|
|
||||||
|
|
||||||
### Current Keyboard Navigation Implementation
|
- **Direct Control**: Arrow keys (↑↓) or vim keys (j/k) navigate services
|
||||||
|
- **Service Actions**:
|
||||||
|
- `s` - Start service (sends UserStart command)
|
||||||
|
- `S` - Stop service (sends UserStop command)
|
||||||
|
- `J` - Show service logs (journalctl in tmux popup)
|
||||||
|
- `L` - Show custom log files (tail -f custom paths in tmux popup)
|
||||||
|
- `R` - Rebuild current host
|
||||||
|
- **Visual Status**: Green ● (active), Yellow ◐ (inactive), Red ◯ (failed)
|
||||||
|
- **Transitional Icons**: Blue arrows during operations
|
||||||
|
|
||||||
**Navigation Controls:**
|
### Navigation
|
||||||
- **Tab**: Switch between hosts (cmbox, srv01, srv02, steambox, etc.)
|
|
||||||
- **Shift+Tab**: Cycle through visible panels (System → Services → Backup → System)
|
- **Tab**: Switch between hosts
|
||||||
- **Up/Down (System/Backup)**: Scroll through panel content
|
- **↑↓ or j/k**: Select services
|
||||||
- **Up/Down (Services)**: Move service selection cursor between parent services
|
- **s**: Start selected service (UserStart)
|
||||||
|
- **S**: Stop selected service (UserStop)
|
||||||
|
- **J**: Show service logs (journalctl)
|
||||||
|
- **L**: Show custom log files
|
||||||
|
- **R**: Rebuild current host
|
||||||
|
- **B**: Run backup on current host
|
||||||
- **q**: Quit dashboard
|
- **q**: Quit dashboard
|
||||||
|
|
||||||
**Panel-Specific Features:**
|
## Core Architecture Principles
|
||||||
- **System Panel**: Scrollable content with CPU, RAM, Storage details
|
|
||||||
- **Services Panel**: Service selection cursor for parent services only (docker, nginx, postgresql, etc.)
|
|
||||||
- **Backup Panel**: Scrollable repository list with proper overflow handling
|
|
||||||
|
|
||||||
**Visual Feedback:**
|
### Structured Data Architecture (✅ IMPLEMENTED v0.1.131)
|
||||||
- **Focused Panel**: Blue border and title highlighting
|
|
||||||
- **Service Selection**: Blue background with preserved status icon colors (green ● for active, red ● for failed)
|
|
||||||
- **Focus-Aware Selection**: Selection highlighting only visible when Services panel focused
|
|
||||||
- **Dynamic Statusbar**: Context-aware shortcuts based on focused panel
|
|
||||||
|
|
||||||
### Remote Command Execution - WORKING ✅
|
Complete migration from string-based metrics to structured JSON data. Eliminates all string parsing bugs and provides type-safe data access.
|
||||||
|
|
||||||
**All Issues Resolved (as of 2025-10-24):**
|
**Previous (String Metrics):**
|
||||||
- ✅ **ZMQ Command Protocol**: Extended with ServiceControl and SystemRebuild variants
|
|
||||||
- ✅ **Agent Handlers**: systemctl and nixos-rebuild execution with maintenance mode
|
|
||||||
- ✅ **Dashboard Integration**: Keyboard shortcuts execute commands
|
|
||||||
- ✅ **Service Control**: Fixed toggle logic - replaced with separate 's' (start) and 'S' (stop)
|
|
||||||
- ✅ **System Rebuild**: Fixed permission issues and sandboxing problems
|
|
||||||
- ✅ **Git Clone Approach**: Implemented for nixos-rebuild to avoid directory permissions
|
|
||||||
- ✅ **Visual Feedback**: Directional arrows for service status (↑ starting, ↓ stopping, ↻ restarting)
|
|
||||||
|
|
||||||
**Keyboard Controls Status:**
|
- ❌ Agent sent individual metrics with string names like `disk_nvme0n1_temperature`
|
||||||
- **Services Panel**:
|
- ❌ Dashboard parsed metric names with underscore counting and string splitting
|
||||||
- R (restart) ✅ Working
|
- ❌ Complex and error-prone metric filtering and extraction logic
|
||||||
- s (start) ✅ Working
|
|
||||||
- S (stop) ✅ Working
|
|
||||||
- **System Panel**: R (nixos-rebuild) ✅ Working with --option sandbox false
|
|
||||||
- **Backup Panel**: B (trigger backup) ❓ Not implemented
|
|
||||||
|
|
||||||
**Visual Feedback Implementation - IN PROGRESS:**
|
**Current (Structured Data):**
|
||||||
|
|
||||||
Context-appropriate progress indicators for each panel:
|
```json
|
||||||
|
{
|
||||||
**Services Panel** (Service status transitions):
|
"hostname": "cmbox",
|
||||||
```
|
"agent_version": "v0.1.131",
|
||||||
● nginx active → ⏳ nginx restarting → ● nginx active
|
"timestamp": 1763926877,
|
||||||
● docker active → ⏳ docker stopping → ● docker inactive
|
"system": {
|
||||||
|
"cpu": {
|
||||||
|
"load_1min": 3.5,
|
||||||
|
"load_5min": 3.57,
|
||||||
|
"load_15min": 3.58,
|
||||||
|
"frequency_mhz": 1500,
|
||||||
|
"temperature_celsius": 45.2
|
||||||
|
},
|
||||||
|
"memory": {
|
||||||
|
"usage_percent": 25.0,
|
||||||
|
"total_gb": 23.3,
|
||||||
|
"used_gb": 5.9,
|
||||||
|
"swap_total_gb": 10.7,
|
||||||
|
"swap_used_gb": 0.99,
|
||||||
|
"tmpfs": [
|
||||||
|
{
|
||||||
|
"mount": "/tmp",
|
||||||
|
"usage_percent": 15.0,
|
||||||
|
"used_gb": 0.3,
|
||||||
|
"total_gb": 2.0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"storage": {
|
||||||
|
"drives": [
|
||||||
|
{
|
||||||
|
"name": "nvme0n1",
|
||||||
|
"health": "PASSED",
|
||||||
|
"temperature_celsius": 29.0,
|
||||||
|
"wear_percent": 1.0,
|
||||||
|
"filesystems": [
|
||||||
|
{
|
||||||
|
"mount": "/",
|
||||||
|
"usage_percent": 24.0,
|
||||||
|
"used_gb": 224.9,
|
||||||
|
"total_gb": 928.2
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"pools": [
|
||||||
|
{
|
||||||
|
"name": "srv_media",
|
||||||
|
"mount": "/srv/media",
|
||||||
|
"type": "mergerfs",
|
||||||
|
"health": "healthy",
|
||||||
|
"usage_percent": 63.0,
|
||||||
|
"used_gb": 2355.2,
|
||||||
|
"total_gb": 3686.4,
|
||||||
|
"data_drives": [{ "name": "sdb", "temperature_celsius": 24.0 }],
|
||||||
|
"parity_drives": [{ "name": "sdc", "temperature_celsius": 24.0 }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"services": [
|
||||||
|
{ "name": "sshd", "status": "active", "memory_mb": 4.5, "disk_gb": 0.0 }
|
||||||
|
],
|
||||||
|
"backup": {
|
||||||
|
"status": "completed",
|
||||||
|
"last_run": 1763920000,
|
||||||
|
"next_scheduled": 1764006400,
|
||||||
|
"total_size_gb": 150.5,
|
||||||
|
"repository_health": "ok"
|
||||||
|
}
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
**System Panel** (Build progress in NixOS section):
|
- ✅ Agent sends structured JSON over ZMQ (no legacy support)
|
||||||
```
|
- ✅ Type-safe data access: `data.system.storage.drives[0].temperature_celsius`
|
||||||
NixOS:
|
- ✅ Complete metric coverage: CPU, memory, storage, services, backup
|
||||||
Build: 25.05.20251004.3bcc93c → Build: [████████████ ] 65%
|
- ✅ Backward compatibility via bridge conversion to existing UI widgets
|
||||||
Active users: cm, simon Active users: cm, simon
|
- ✅ All string parsing bugs eliminated
|
||||||
```
|
|
||||||
|
|
||||||
**Backup Panel** (OnGoing status with progress):
|
|
||||||
```
|
|
||||||
Latest backup: → Latest backup:
|
|
||||||
● 2024-10-23 14:32:15 ● OnGoing
|
|
||||||
└─ Duration: 1.3m └─ [██████ ] 60%
|
|
||||||
```
|
|
||||||
|
|
||||||
**Critical Configuration Hash Fix - HIGH PRIORITY:**
|
|
||||||
|
|
||||||
**Problem:** Configuration hash currently shows git commit hash instead of actual deployed system hash.
|
|
||||||
|
|
||||||
**Current (incorrect):**
|
|
||||||
- Shows git hash: `db11f82` (source repository commit)
|
|
||||||
- Not accurate - doesn't reflect what's actually deployed
|
|
||||||
|
|
||||||
**Target (correct):**
|
|
||||||
- Show nix store hash: `d8ivwiar` (first 8 chars from deployed system)
|
|
||||||
- Source: `/nix/store/d8ivwiarhwhgqzskj6q2482r58z46qjf-nixos-system-cmbox-25.05.20251004.3bcc93c`
|
|
||||||
- Pattern: Extract hash from `/nix/store/HASH-nixos-system-HOSTNAME-VERSION`
|
|
||||||
|
|
||||||
**Benefits:**
|
|
||||||
1. **Deployment Verification:** Confirms rebuild actually succeeded
|
|
||||||
2. **Accurate Status:** Shows what's truly running, not just source
|
|
||||||
3. **Rebuild Completion Detection:** Hash change = rebuild completed
|
|
||||||
4. **Rollback Tracking:** Each deployment has unique identifier
|
|
||||||
|
|
||||||
**Implementation Required:**
|
|
||||||
1. Agent extracts nix store hash from `ls -la /run/current-system`
|
|
||||||
2. Reports this as `system_config_hash` metric instead of git hash
|
|
||||||
3. Dashboard displays first 8 characters: `Config: d8ivwiar`
|
|
||||||
|
|
||||||
**Next Session Priority Tasks:**
|
|
||||||
|
|
||||||
**Remaining Features:**
|
|
||||||
1. **Fix Configuration Hash Display (CRITICAL)**:
|
|
||||||
- Use nix store hash instead of git commit hash
|
|
||||||
- Extract from `/run/current-system` -> `/nix/store/HASH-nixos-system-*`
|
|
||||||
- Enables proper rebuild completion detection
|
|
||||||
|
|
||||||
2. **Command Response Protocol**:
|
|
||||||
- Agent sends command completion/failure back to dashboard via ZMQ
|
|
||||||
- Dashboard updates UI status from ⏳ to ● when commands complete
|
|
||||||
- Clear success/failure status after timeout
|
|
||||||
|
|
||||||
3. **Backup Panel Features**:
|
|
||||||
- Implement backup trigger functionality (B key)
|
|
||||||
- Complete visual feedback for backup operations
|
|
||||||
- Add backup progress indicators
|
|
||||||
|
|
||||||
**Enhancement Tasks:**
|
|
||||||
- Add confirmation dialogs for destructive actions (stop/restart/rebuild)
|
|
||||||
- Implement command history/logging
|
|
||||||
- Add keyboard shortcuts help overlay
|
|
||||||
|
|
||||||
**Future Enhanced Navigation:**
|
|
||||||
- Add Page Up/Down for faster scrolling through long service lists
|
|
||||||
- Implement search/filter functionality for services
|
|
||||||
- Add jump-to-service shortcuts (first letter navigation)
|
|
||||||
|
|
||||||
**Future Advanced Features:**
|
|
||||||
- Service dependency visualization
|
|
||||||
- Historical service status tracking
|
|
||||||
- Real-time log viewing integration
|
|
||||||
|
|
||||||
## Core Architecture Principles - CRITICAL
|
|
||||||
|
|
||||||
### Individual Metrics Philosophy
|
|
||||||
|
|
||||||
**NEW ARCHITECTURE**: Agent collects individual metrics, dashboard composes widgets from those metrics.
|
|
||||||
|
|
||||||
### Maintenance Mode
|
### Maintenance Mode
|
||||||
|
|
||||||
**Purpose:**
|
|
||||||
|
|
||||||
- Suppress email notifications during planned maintenance or backups
|
|
||||||
- Prevents false alerts when services are intentionally stopped
|
|
||||||
|
|
||||||
**Implementation:**
|
|
||||||
|
|
||||||
- Agent checks for `/tmp/cm-maintenance` file before sending notifications
|
- Agent checks for `/tmp/cm-maintenance` file before sending notifications
|
||||||
- File presence suppresses all email notifications while continuing monitoring
|
- File presence suppresses all email notifications while continuing monitoring
|
||||||
- Dashboard continues to show real status, only notifications are blocked
|
- Dashboard continues to show real status, only notifications are blocked
|
||||||
|
|
||||||
**Usage:**
|
Usage:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Enable maintenance mode
|
# Enable maintenance mode
|
||||||
touch /tmp/cm-maintenance
|
touch /tmp/cm-maintenance
|
||||||
|
|
||||||
# Run maintenance tasks (backups, service restarts, etc.)
|
# Run maintenance tasks
|
||||||
systemctl stop service
|
systemctl stop service
|
||||||
# ... maintenance work ...
|
# ... maintenance work ...
|
||||||
systemctl start service
|
systemctl start service
|
||||||
@ -211,43 +177,172 @@ systemctl start service
|
|||||||
rm /tmp/cm-maintenance
|
rm /tmp/cm-maintenance
|
||||||
```
|
```
|
||||||
|
|
||||||
**NixOS Integration:**
|
## Development and Deployment Architecture
|
||||||
|
|
||||||
- Borgbackup script automatically creates/removes maintenance file
|
### Development Path
|
||||||
- Automatic cleanup via trap ensures maintenance mode doesn't stick
|
|
||||||
- All cinfiguration are shall be done from nixos config
|
|
||||||
|
|
||||||
**ARCHITECTURE ENFORCEMENT**:
|
- **Location:** `~/projects/cm-dashboard`
|
||||||
|
- **Purpose:** Development workflow only - for committing new code
|
||||||
|
- **Access:** Only for developers to commit changes
|
||||||
|
|
||||||
- **ZERO legacy code reuse** - Fresh implementation following ARCHITECT.md exactly
|
### Deployment Path
|
||||||
- **Individual metrics only** - NO grouped metric structures
|
|
||||||
- **Reference-only legacy** - Study old functionality, implement new architecture
|
|
||||||
- **Clean slate mindset** - Build as if legacy codebase never existed
|
|
||||||
|
|
||||||
**Implementation Rules**:
|
- **Location:** `/var/lib/cm-dashboard/nixos-config`
|
||||||
|
- **Purpose:** Production deployment only - agent clones/pulls from git
|
||||||
|
- **Workflow:** git pull → `/var/lib/cm-dashboard/nixos-config` → nixos-rebuild
|
||||||
|
|
||||||
1. **Individual Metrics**: Each metric is collected, transmitted, and stored individually
|
### Git Flow
|
||||||
2. **Agent Status Authority**: Agent calculates status for each metric using thresholds
|
|
||||||
3. **Dashboard Composition**: Dashboard widgets subscribe to specific metrics by name
|
|
||||||
4. **Status Aggregation**: Dashboard aggregates individual metric statuses for widget status
|
|
||||||
**Testing & Building**:
|
|
||||||
|
|
||||||
- **Workspace builds**: `cargo build --workspace` for all testing
|
```
|
||||||
- **Clean compilation**: Remove `target/` between architecture changes
|
Development: ~/projects/cm-dashboard → git commit → git push
|
||||||
- **ZMQ testing**: Test agent-dashboard communication independently
|
Deployment: git pull → /var/lib/cm-dashboard/nixos-config → rebuild
|
||||||
- **Widget testing**: Verify UI layout matches legacy appearance exactly
|
```
|
||||||
|
|
||||||
**NEVER in New Implementation**:
|
## Automated Binary Release System
|
||||||
|
|
||||||
- Copy/paste ANY code from legacy backup
|
CM Dashboard uses automated binary releases instead of source builds.
|
||||||
- Calculate status in dashboard widgets
|
|
||||||
- Hardcode metric names in widgets (use const arrays)
|
|
||||||
|
|
||||||
# Important Communication Guidelines
|
### Creating New Releases
|
||||||
|
|
||||||
NEVER write that you have "successfully implemented" something or generate extensive summary text without first verifying with the user that the implementation is correct. This wastes tokens. Keep responses concise.
|
```bash
|
||||||
|
cd ~/projects/cm-dashboard
|
||||||
|
git tag v0.1.X
|
||||||
|
git push origin v0.1.X
|
||||||
|
```
|
||||||
|
|
||||||
NEVER implement code without first getting explicit user agreement on the approach. Always ask for confirmation before proceeding with implementation.
|
This automatically:
|
||||||
|
|
||||||
|
- Builds static binaries with `RUSTFLAGS="-C target-feature=+crt-static"`
|
||||||
|
- Creates GitHub-style release with tarball
|
||||||
|
- Uploads binaries via Gitea API
|
||||||
|
|
||||||
|
### NixOS Configuration Updates
|
||||||
|
|
||||||
|
Edit `~/projects/nixosbox/hosts/services/cm-dashboard.nix`:
|
||||||
|
|
||||||
|
```nix
|
||||||
|
version = "v0.1.X";
|
||||||
|
src = pkgs.fetchurl {
|
||||||
|
url = "https://gitea.cmtec.se/cm/cm-dashboard/releases/download/${version}/cm-dashboard-linux-x86_64.tar.gz";
|
||||||
|
sha256 = "sha256-NEW_HASH_HERE";
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get Release Hash
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/projects/nixosbox
|
||||||
|
nix-build --no-out-link -E 'with import <nixpkgs> {}; fetchurl {
|
||||||
|
url = "https://gitea.cmtec.se/cm/cm-dashboard/releases/download/v0.1.X/cm-dashboard-linux-x86_64.tar.gz";
|
||||||
|
sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
|
||||||
|
}' 2>&1 | grep "got:"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Building
|
||||||
|
|
||||||
|
**Testing & Building:**
|
||||||
|
|
||||||
|
- **Workspace builds**: `nix-shell -p openssl pkg-config --run "cargo build --workspace"`
|
||||||
|
- **Clean compilation**: Remove `target/` between major changes
|
||||||
|
|
||||||
|
## Enhanced Storage Pool Visualization
|
||||||
|
|
||||||
|
### Auto-Discovery Architecture
|
||||||
|
|
||||||
|
The dashboard uses automatic storage discovery to eliminate manual configuration complexity while providing intelligent storage pool grouping.
|
||||||
|
|
||||||
|
### Discovery Process
|
||||||
|
|
||||||
|
**At Agent Startup:**
|
||||||
|
|
||||||
|
1. Parse `/proc/mounts` to identify all mounted filesystems
|
||||||
|
2. Detect MergerFS pools by analyzing `fuse.mergerfs` mount sources
|
||||||
|
3. Identify member disks and potential parity relationships via heuristics
|
||||||
|
4. Store discovered storage topology for continuous monitoring
|
||||||
|
5. Generate pool-aware metrics with hierarchical relationships
|
||||||
|
|
||||||
|
**Continuous Monitoring:**
|
||||||
|
|
||||||
|
- Use stored discovery data for efficient metric collection
|
||||||
|
- Monitor individual drives for SMART data, temperature, wear
|
||||||
|
- Calculate pool-level health based on member drive status
|
||||||
|
- Generate enhanced metrics for dashboard visualization
|
||||||
|
|
||||||
|
### Supported Storage Types
|
||||||
|
|
||||||
|
**Single Disks:**
|
||||||
|
|
||||||
|
- ext4, xfs, btrfs mounted directly
|
||||||
|
- Individual drive monitoring with SMART data
|
||||||
|
- Traditional single-disk display for root, boot, etc.
|
||||||
|
|
||||||
|
**MergerFS Pools:**
|
||||||
|
|
||||||
|
- Auto-detect from `/proc/mounts` fuse.mergerfs entries
|
||||||
|
- Parse source paths to identify member disks (e.g., "/mnt/disk1:/mnt/disk2")
|
||||||
|
- Heuristic parity disk detection (sequential device names, "parity" in path)
|
||||||
|
- Pool health calculation (healthy/degraded/critical)
|
||||||
|
- Hierarchical tree display with data/parity disk grouping
|
||||||
|
|
||||||
|
**Future Extensions Ready:**
|
||||||
|
|
||||||
|
- RAID arrays via `/proc/mdstat` parsing
|
||||||
|
- ZFS pools via `zpool status` integration
|
||||||
|
- LVM logical volumes via `lvs` discovery
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[collectors.disk]
|
||||||
|
enabled = true
|
||||||
|
auto_discover = true # Default: true
|
||||||
|
# Optional exclusions for special filesystems
|
||||||
|
exclude_mount_points = ["/tmp", "/proc", "/sys", "/dev"]
|
||||||
|
exclude_fs_types = ["tmpfs", "devtmpfs", "sysfs", "proc"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Display Format
|
||||||
|
|
||||||
|
```
|
||||||
|
Network:
|
||||||
|
● eno1:
|
||||||
|
├─ ip: 192.168.30.105
|
||||||
|
└─ tailscale0: 100.125.108.16
|
||||||
|
● eno2:
|
||||||
|
└─ ip: 192.168.32.105
|
||||||
|
CPU:
|
||||||
|
● Load: 0.23 0.21 0.13
|
||||||
|
└─ Freq: 1048 MHz
|
||||||
|
RAM:
|
||||||
|
● Usage: 25% 5.8GB/23.3GB
|
||||||
|
├─ ● /tmp: 2% 0.5GB/2GB
|
||||||
|
└─ ● /var/tmp: 0% 0GB/1.0GB
|
||||||
|
Storage:
|
||||||
|
● 844B9A25 T: 25C W: 4%
|
||||||
|
├─ ● /: 55% 250.5GB/456.4GB
|
||||||
|
└─ ● /boot: 26% 0.3GB/1.0GB
|
||||||
|
● mergerfs /srv/media:
|
||||||
|
├─ ● 63% 2355.2GB/3686.4GB
|
||||||
|
├─ ● Data_1: WDZQ8H8D T: 28°C
|
||||||
|
├─ ● Data_2: GGA04461 T: 28°C
|
||||||
|
└─ ● Parity: WDZS8RY0 T: 29°C
|
||||||
|
Backup:
|
||||||
|
● Repo: 4
|
||||||
|
├─ getea
|
||||||
|
├─ vaultwarden
|
||||||
|
├─ mysql
|
||||||
|
└─ immich
|
||||||
|
● W800639Y W: 2%
|
||||||
|
├─ ● Backup: 2025-11-29T04:00:01.324623
|
||||||
|
└─ ● Usage: 8% 70GB/916GB
|
||||||
|
● WD-WCC7K1234567 T: 32°C W: 12%
|
||||||
|
├─ ● Backup: 2025-11-29T04:00:01.324623
|
||||||
|
└─ ● Usage: 45% 678GB/1.5TB
|
||||||
|
```
|
||||||
|
|
||||||
|
## Important Communication Guidelines
|
||||||
|
|
||||||
|
Keep responses concise and focused. Avoid extensive implementation summaries unless requested.
|
||||||
|
|
||||||
## Commit Message Guidelines
|
## Commit Message Guidelines
|
||||||
|
|
||||||
@ -273,83 +368,23 @@ NEVER implement code without first getting explicit user agreement on the approa
|
|||||||
- ✅ "Restructure storage widget with improved layout"
|
- ✅ "Restructure storage widget with improved layout"
|
||||||
- ✅ "Update CPU thresholds to production values"
|
- ✅ "Update CPU thresholds to production values"
|
||||||
|
|
||||||
## Development and Deployment Architecture
|
## Implementation Rules
|
||||||
|
|
||||||
**CRITICAL:** Development and deployment paths are completely separate:
|
1. **Agent Status Authority**: Agent calculates status for each metric using thresholds
|
||||||
|
2. **Dashboard Composition**: Dashboard widgets subscribe to specific metrics by name
|
||||||
|
3. **Status Aggregation**: Dashboard aggregates individual metric statuses for widget status
|
||||||
|
|
||||||
### Development Path
|
**NEVER:**
|
||||||
- **Location:** `~/projects/nixosbox`
|
|
||||||
- **Purpose:** Development workflow only - for committing new cm-dashboard code
|
|
||||||
- **Access:** Only for developers to commit changes
|
|
||||||
- **Code Access:** Running cm-dashboard code shall NEVER access this path
|
|
||||||
|
|
||||||
### Deployment Path
|
- Copy/paste ANY code from legacy implementations
|
||||||
- **Location:** `/var/lib/cm-dashboard/nixos-config`
|
- Calculate status in dashboard widgets
|
||||||
- **Purpose:** Production deployment only - agent clones/pulls from git
|
- Hardcode metric names in widgets (use const arrays)
|
||||||
- **Access:** Only cm-dashboard agent for deployment operations
|
- Create files unless absolutely necessary for achieving goals
|
||||||
- **Workflow:** git pull → `/var/lib/cm-dashboard/nixos-config` → nixos-rebuild
|
- Create documentation files unless explicitly requested
|
||||||
|
|
||||||
### Git Flow
|
**ALWAYS:**
|
||||||
```
|
|
||||||
Development: ~/projects/nixosbox → git commit → git push
|
|
||||||
Deployment: git pull → /var/lib/cm-dashboard/nixos-config → rebuild
|
|
||||||
```
|
|
||||||
|
|
||||||
## Automated Binary Release System
|
- Prefer editing existing files to creating new ones
|
||||||
|
- Follow existing code conventions and patterns
|
||||||
**IMPLEMENTED:** cm-dashboard now uses automated binary releases instead of source builds.
|
- Use existing libraries and utilities
|
||||||
|
- Follow security best practices
|
||||||
### Release Workflow
|
|
||||||
|
|
||||||
1. **Automated Release Creation**
|
|
||||||
- Gitea Actions workflow builds static binaries on tag push
|
|
||||||
- Creates release with `cm-dashboard-linux-x86_64.tar.gz` tarball
|
|
||||||
- No manual intervention required for binary generation
|
|
||||||
|
|
||||||
2. **Creating New Releases**
|
|
||||||
```bash
|
|
||||||
cd ~/projects/cm-dashboard
|
|
||||||
git tag v0.1.X
|
|
||||||
git push origin v0.1.X
|
|
||||||
```
|
|
||||||
|
|
||||||
This automatically:
|
|
||||||
- Builds static binaries with `RUSTFLAGS="-C target-feature=+crt-static"`
|
|
||||||
- Creates GitHub-style release with tarball
|
|
||||||
- Uploads binaries via Gitea API
|
|
||||||
|
|
||||||
3. **NixOS Configuration Updates**
|
|
||||||
Edit `~/projects/nixosbox/hosts/common/cm-dashboard.nix`:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
version = "v0.1.X";
|
|
||||||
src = pkgs.fetchurl {
|
|
||||||
url = "https://gitea.cmtec.se/cm/cm-dashboard/releases/download/${version}/cm-dashboard-linux-x86_64.tar.gz";
|
|
||||||
sha256 = "sha256-NEW_HASH_HERE";
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
4. **Get Release Hash**
|
|
||||||
```bash
|
|
||||||
cd ~/projects/nixosbox
|
|
||||||
nix-build --no-out-link -E 'with import <nixpkgs> {}; fetchurl {
|
|
||||||
url = "https://gitea.cmtec.se/cm/cm-dashboard/releases/download/v0.1.X/cm-dashboard-linux-x86_64.tar.gz";
|
|
||||||
sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
|
|
||||||
}' 2>&1 | grep "got:"
|
|
||||||
```
|
|
||||||
|
|
||||||
5. **Commit and Deploy**
|
|
||||||
```bash
|
|
||||||
cd ~/projects/nixosbox
|
|
||||||
git add hosts/common/cm-dashboard.nix
|
|
||||||
git commit -m "Update cm-dashboard to v0.1.X with static binaries"
|
|
||||||
git push
|
|
||||||
```
|
|
||||||
|
|
||||||
### Benefits
|
|
||||||
|
|
||||||
- **No compilation overhead** on each host
|
|
||||||
- **Consistent static binaries** across all hosts
|
|
||||||
- **Faster deployments** - download vs compile
|
|
||||||
- **No library dependency issues** - static linking
|
|
||||||
- **Automated pipeline** - tag push triggers everything
|
|
||||||
|
|||||||
279
Cargo.lock
generated
279
Cargo.lock
generated
@ -17,9 +17,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "1.1.3"
|
version = "1.1.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
@ -71,22 +71,22 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anstyle-query"
|
name = "anstyle-query"
|
||||||
version = "1.1.4"
|
version = "1.1.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2"
|
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-sys 0.60.2",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anstyle-wincon"
|
name = "anstyle-wincon"
|
||||||
version = "3.0.10"
|
version = "3.0.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a"
|
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anstyle",
|
"anstyle",
|
||||||
"once_cell_polyfill",
|
"once_cell_polyfill",
|
||||||
"windows-sys 0.60.2",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -95,6 +95,15 @@ version = "1.0.100"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
|
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ar_archive_writer"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a"
|
||||||
|
dependencies = [
|
||||||
|
"object",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-trait"
|
name = "async-trait"
|
||||||
version = "0.1.89"
|
version = "0.1.89"
|
||||||
@ -144,9 +153,9 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytes"
|
name = "bytes"
|
||||||
version = "1.10.1"
|
version = "1.11.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
|
checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cassowary"
|
name = "cassowary"
|
||||||
@ -156,9 +165,9 @@ checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.2.41"
|
version = "1.2.46"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7"
|
checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"find-msvc-tools",
|
"find-msvc-tools",
|
||||||
"jobserver",
|
"jobserver",
|
||||||
@ -230,9 +239,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.5.49"
|
version = "4.5.52"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f4512b90fa68d3a9932cea5184017c5d200f5921df706d45e853537dea51508f"
|
checksum = "aa8120877db0e5c011242f96806ce3c94e0737ab8108532a76a3300a01db2ab8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap_builder",
|
"clap_builder",
|
||||||
"clap_derive",
|
"clap_derive",
|
||||||
@ -240,9 +249,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap_builder"
|
name = "clap_builder"
|
||||||
version = "4.5.49"
|
version = "4.5.52"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0025e98baa12e766c67ba13ff4695a887a1eba19569aad00a472546795bd6730"
|
checksum = "02576b399397b659c26064fbc92a75fede9d18ffd5f80ca1cd74ddab167016e1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anstream",
|
"anstream",
|
||||||
"anstyle",
|
"anstyle",
|
||||||
@ -270,7 +279,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.0"
|
version = "0.1.274"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
@ -286,12 +295,13 @@ dependencies = [
|
|||||||
"toml",
|
"toml",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
|
"wake-on-lan",
|
||||||
"zmq",
|
"zmq",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.0"
|
version = "0.1.274"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@ -299,6 +309,7 @@ dependencies = [
|
|||||||
"chrono-tz",
|
"chrono-tz",
|
||||||
"clap",
|
"clap",
|
||||||
"cm-dashboard-shared",
|
"cm-dashboard-shared",
|
||||||
|
"futures",
|
||||||
"gethostname",
|
"gethostname",
|
||||||
"lettre",
|
"lettre",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
@ -314,7 +325,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.0"
|
version = "0.1.274"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"serde",
|
"serde",
|
||||||
@ -502,9 +513,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "find-msvc-tools"
|
name = "find-msvc-tools"
|
||||||
version = "0.1.4"
|
version = "0.1.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
|
checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fnv"
|
name = "fnv"
|
||||||
@ -542,6 +553,21 @@ dependencies = [
|
|||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
|
||||||
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
|
"futures-executor",
|
||||||
|
"futures-io",
|
||||||
|
"futures-sink",
|
||||||
|
"futures-task",
|
||||||
|
"futures-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-channel"
|
name = "futures-channel"
|
||||||
version = "0.3.31"
|
version = "0.3.31"
|
||||||
@ -549,6 +575,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
|
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
"futures-sink",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -557,12 +584,34 @@ version = "0.3.31"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-executor"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"futures-task",
|
||||||
|
"futures-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-io"
|
name = "futures-io"
|
||||||
version = "0.3.31"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
|
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-macro"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.31"
|
version = "0.3.31"
|
||||||
@ -581,8 +630,11 @@ version = "0.3.31"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-io",
|
"futures-io",
|
||||||
|
"futures-macro",
|
||||||
|
"futures-sink",
|
||||||
"futures-task",
|
"futures-task",
|
||||||
"memchr",
|
"memchr",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
@ -767,9 +819,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "icu_collections"
|
name = "icu_collections"
|
||||||
version = "2.0.0"
|
version = "2.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
|
checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"displaydoc",
|
"displaydoc",
|
||||||
"potential_utf",
|
"potential_utf",
|
||||||
@ -780,9 +832,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "icu_locale_core"
|
name = "icu_locale_core"
|
||||||
version = "2.0.0"
|
version = "2.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
|
checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"displaydoc",
|
"displaydoc",
|
||||||
"litemap",
|
"litemap",
|
||||||
@ -793,11 +845,10 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "icu_normalizer"
|
name = "icu_normalizer"
|
||||||
version = "2.0.0"
|
version = "2.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
|
checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"displaydoc",
|
|
||||||
"icu_collections",
|
"icu_collections",
|
||||||
"icu_normalizer_data",
|
"icu_normalizer_data",
|
||||||
"icu_properties",
|
"icu_properties",
|
||||||
@ -808,42 +859,38 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "icu_normalizer_data"
|
name = "icu_normalizer_data"
|
||||||
version = "2.0.0"
|
version = "2.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
|
checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "icu_properties"
|
name = "icu_properties"
|
||||||
version = "2.0.1"
|
version = "2.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b"
|
checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"displaydoc",
|
|
||||||
"icu_collections",
|
"icu_collections",
|
||||||
"icu_locale_core",
|
"icu_locale_core",
|
||||||
"icu_properties_data",
|
"icu_properties_data",
|
||||||
"icu_provider",
|
"icu_provider",
|
||||||
"potential_utf",
|
|
||||||
"zerotrie",
|
"zerotrie",
|
||||||
"zerovec",
|
"zerovec",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "icu_properties_data"
|
name = "icu_properties_data"
|
||||||
version = "2.0.1"
|
version = "2.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632"
|
checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "icu_provider"
|
name = "icu_provider"
|
||||||
version = "2.0.0"
|
version = "2.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
|
checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"displaydoc",
|
"displaydoc",
|
||||||
"icu_locale_core",
|
"icu_locale_core",
|
||||||
"stable_deref_trait",
|
|
||||||
"tinystr",
|
|
||||||
"writeable",
|
"writeable",
|
||||||
"yoke",
|
"yoke",
|
||||||
"zerofrom",
|
"zerofrom",
|
||||||
@ -884,9 +931,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indoc"
|
name = "indoc"
|
||||||
version = "2.0.6"
|
version = "2.0.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
|
checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
|
||||||
|
dependencies = [
|
||||||
|
"rustversion",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ipnet"
|
name = "ipnet"
|
||||||
@ -896,9 +946,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "is_terminal_polyfill"
|
name = "is_terminal_polyfill"
|
||||||
version = "1.70.1"
|
version = "1.70.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itertools"
|
name = "itertools"
|
||||||
@ -927,9 +977,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "js-sys"
|
name = "js-sys"
|
||||||
version = "0.3.81"
|
version = "0.3.82"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305"
|
checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
@ -987,9 +1037,9 @@ checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "litemap"
|
name = "litemap"
|
||||||
version = "0.8.0"
|
version = "0.8.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
|
checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
@ -1103,6 +1153,15 @@ dependencies = [
|
|||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "object"
|
||||||
|
version = "0.32.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "once_cell"
|
name = "once_cell"
|
||||||
version = "1.21.3"
|
version = "1.21.3"
|
||||||
@ -1111,15 +1170,15 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "once_cell_polyfill"
|
name = "once_cell_polyfill"
|
||||||
version = "1.70.1"
|
version = "1.70.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openssl"
|
name = "openssl"
|
||||||
version = "0.10.74"
|
version = "0.10.75"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "24ad14dd45412269e1a30f52ad8f0664f0f4f4a89ee8fe28c3b3527021ebb654"
|
checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.10.0",
|
"bitflags 2.10.0",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
@ -1149,9 +1208,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openssl-sys"
|
name = "openssl-sys"
|
||||||
version = "0.9.110"
|
version = "0.9.111"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0a9f0075ba3c21b09f8e8b2026584b1d18d49388648f2fbbf3c97ea8deced8e2"
|
checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"libc",
|
"libc",
|
||||||
@ -1261,36 +1320,37 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "potential_utf"
|
name = "potential_utf"
|
||||||
version = "0.1.3"
|
version = "0.1.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a"
|
checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"zerovec",
|
"zerovec",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.101"
|
version = "1.0.103"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
|
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "psm"
|
name = "psm"
|
||||||
version = "0.1.27"
|
version = "0.1.28"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e66fcd288453b748497d8fb18bccc83a16b0518e3906d4b8df0a8d42d93dbb1c"
|
checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"ar_archive_writer",
|
||||||
"cc",
|
"cc",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quote"
|
name = "quote"
|
||||||
version = "1.0.41"
|
version = "1.0.42"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
|
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
@ -1610,9 +1670,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "signal-hook-mio"
|
name = "signal-hook-mio"
|
||||||
version = "0.2.4"
|
version = "0.2.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd"
|
checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"mio 0.8.11",
|
"mio 0.8.11",
|
||||||
@ -1715,9 +1775,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "2.0.107"
|
version = "2.0.110"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b"
|
checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@ -1825,9 +1885,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tinystr"
|
name = "tinystr"
|
||||||
version = "0.8.1"
|
version = "0.8.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
|
checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"displaydoc",
|
"displaydoc",
|
||||||
"zerovec",
|
"zerovec",
|
||||||
@ -1873,9 +1933,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-util"
|
name = "tokio-util"
|
||||||
version = "0.7.16"
|
version = "0.7.17"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5"
|
checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
@ -2000,9 +2060,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-ident"
|
name = "unicode-ident"
|
||||||
version = "1.0.19"
|
version = "1.0.22"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
|
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-segmentation"
|
name = "unicode-segmentation"
|
||||||
@ -2054,9 +2114,9 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "version-compare"
|
name = "version-compare"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b"
|
checksum = "03c2856837ef78f57382f06b2b8563a2f512f7185d732608fd9176cb3b8edf0e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "version_check"
|
name = "version_check"
|
||||||
@ -2064,6 +2124,12 @@ version = "0.9.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wake-on-lan"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1ccf60b60ad7e5b1b37372c5134cbcab4db0706c231d212e0c643a077462bc8f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "walkdir"
|
name = "walkdir"
|
||||||
version = "2.5.0"
|
version = "2.5.0"
|
||||||
@ -2100,9 +2166,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen"
|
name = "wasm-bindgen"
|
||||||
version = "0.2.104"
|
version = "0.2.105"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d"
|
checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
@ -2111,25 +2177,11 @@ dependencies = [
|
|||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "wasm-bindgen-backend"
|
|
||||||
version = "0.2.104"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19"
|
|
||||||
dependencies = [
|
|
||||||
"bumpalo",
|
|
||||||
"log",
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
"wasm-bindgen-shared",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen-futures"
|
name = "wasm-bindgen-futures"
|
||||||
version = "0.4.54"
|
version = "0.4.55"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c"
|
checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"js-sys",
|
"js-sys",
|
||||||
@ -2140,9 +2192,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen-macro"
|
name = "wasm-bindgen-macro"
|
||||||
version = "0.2.104"
|
version = "0.2.105"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119"
|
checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"quote",
|
"quote",
|
||||||
"wasm-bindgen-macro-support",
|
"wasm-bindgen-macro-support",
|
||||||
@ -2150,31 +2202,31 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen-macro-support"
|
name = "wasm-bindgen-macro-support"
|
||||||
version = "0.2.104"
|
version = "0.2.105"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
|
checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"bumpalo",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn",
|
||||||
"wasm-bindgen-backend",
|
|
||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen-shared"
|
name = "wasm-bindgen-shared"
|
||||||
version = "0.2.104"
|
version = "0.2.105"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1"
|
checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "web-sys"
|
name = "web-sys"
|
||||||
version = "0.3.81"
|
version = "0.3.82"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120"
|
checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"js-sys",
|
"js-sys",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
@ -2528,17 +2580,16 @@ checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "writeable"
|
name = "writeable"
|
||||||
version = "0.6.1"
|
version = "0.6.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
|
checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "yoke"
|
name = "yoke"
|
||||||
version = "0.8.0"
|
version = "0.8.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
|
checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde",
|
|
||||||
"stable_deref_trait",
|
"stable_deref_trait",
|
||||||
"yoke-derive",
|
"yoke-derive",
|
||||||
"zerofrom",
|
"zerofrom",
|
||||||
@ -2546,9 +2597,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "yoke-derive"
|
name = "yoke-derive"
|
||||||
version = "0.8.0"
|
version = "0.8.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
|
checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@ -2609,9 +2660,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zerotrie"
|
name = "zerotrie"
|
||||||
version = "0.2.2"
|
version = "0.2.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"
|
checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"displaydoc",
|
"displaydoc",
|
||||||
"yoke",
|
"yoke",
|
||||||
@ -2620,9 +2671,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zerovec"
|
name = "zerovec"
|
||||||
version = "0.11.4"
|
version = "0.11.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b"
|
checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"yoke",
|
"yoke",
|
||||||
"zerofrom",
|
"zerofrom",
|
||||||
@ -2631,9 +2682,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zerovec-derive"
|
name = "zerovec-derive"
|
||||||
version = "0.11.1"
|
version = "0.11.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
|
checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
|||||||
515
README.md
515
README.md
@ -1,88 +1,108 @@
|
|||||||
# CM Dashboard
|
# CM Dashboard
|
||||||
|
|
||||||
A real-time infrastructure monitoring system with intelligent status aggregation and email notifications, built with Rust and ZMQ.
|
A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure. Built with ZMQ-based metric collection and individual metrics architecture.
|
||||||
|
|
||||||
## Current Implementation
|
## Features
|
||||||
|
|
||||||
This is a complete rewrite implementing an **individual metrics architecture** where:
|
### Core Monitoring
|
||||||
|
- **Real-time metrics**: CPU, RAM, Storage, and Service status
|
||||||
|
- **Multi-host support**: Monitor multiple servers from single dashboard
|
||||||
|
- **Service management**: Start/stop services with intelligent status tracking
|
||||||
|
- **NixOS integration**: System rebuild via SSH + tmux popup
|
||||||
|
- **Backup monitoring**: Borgbackup status and scheduling
|
||||||
|
- **Email notifications**: Intelligent batching prevents spam
|
||||||
|
|
||||||
- **Agent** collects individual metrics (e.g., `cpu_load_1min`, `memory_usage_percent`) and calculates status
|
### User-Stopped Service Tracking
|
||||||
- **Dashboard** subscribes to specific metrics and composes widgets
|
Services stopped via the dashboard are intelligently tracked to prevent false alerts:
|
||||||
- **Status Aggregation** provides intelligent email notifications with batching
|
|
||||||
- **Persistent Cache** prevents false notifications on restart
|
|
||||||
|
|
||||||
## Dashboard Interface
|
- **Smart status reporting**: User-stopped services show as Status::OK instead of Warning
|
||||||
|
- **Persistent storage**: Tracking survives agent restarts via JSON storage
|
||||||
|
- **Automatic management**: Flags cleared when services restarted via dashboard
|
||||||
|
- **Maintenance friendly**: No false alerts during intentional service operations
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Individual Metrics Philosophy
|
||||||
|
- **Agent**: Collects individual metrics, calculates status using thresholds
|
||||||
|
- **Dashboard**: Subscribes to specific metrics, composes widgets from individual data
|
||||||
|
- **ZMQ Communication**: Efficient real-time metric transmission
|
||||||
|
- **Status Aggregation**: Host-level status calculated from all service metrics
|
||||||
|
|
||||||
|
### Components
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────┐ ZMQ ┌─────────────────┐
|
||||||
|
│ │◄──────────►│ │
|
||||||
|
│ Agent │ Metrics │ Dashboard │
|
||||||
|
│ - Collectors │ │ - TUI │
|
||||||
|
│ - Status │ │ - Widgets │
|
||||||
|
│ - Tracking │ │ - Commands │
|
||||||
|
│ │ │ │
|
||||||
|
└─────────────────┘ └─────────────────┘
|
||||||
|
│ │
|
||||||
|
▼ ▼
|
||||||
|
┌─────────────────┐ ┌─────────────────┐
|
||||||
|
│ JSON Storage │ │ SSH + tmux │
|
||||||
|
│ - User-stopped │ │ - Remote rebuild│
|
||||||
|
│ - Cache │ │ - Process │
|
||||||
|
│ - State │ │ isolation │
|
||||||
|
└─────────────────┘ └─────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Control Flow
|
||||||
|
|
||||||
|
1. **User Action**: Dashboard sends `UserStart`/`UserStop` commands
|
||||||
|
2. **Agent Processing**:
|
||||||
|
- Marks service as user-stopped (if stopping)
|
||||||
|
- Executes `systemctl start/stop service`
|
||||||
|
- Syncs state to global tracker
|
||||||
|
3. **Status Calculation**:
|
||||||
|
- Systemd collector checks user-stopped flag
|
||||||
|
- Reports Status::OK for user-stopped inactive services
|
||||||
|
- Normal Warning status for system failures
|
||||||
|
|
||||||
|
## Interface
|
||||||
|
|
||||||
```
|
```
|
||||||
cm-dashboard • ● cmbox ● srv01 ● srv02 ● steambox
|
cm-dashboard • ● cmbox ● srv01 ● srv02 ● steambox
|
||||||
┌system──────────────────────────────┐┌services─────────────────────────────────────────┐
|
┌system──────────────────────────────┐┌services─────────────────────────────────────────┐
|
||||||
│CPU: ││Service: Status: RAM: Disk: │
|
│NixOS: ││Service: Status: RAM: Disk: │
|
||||||
│● Load: 0.10 0.52 0.88 • 400.0 MHz ││● docker active 27M 496MB │
|
│Build: 25.05.20251004.3bcc93c ││● docker active 27M 496MB │
|
||||||
│RAM: ││● docker-registry active 19M 496MB │
|
│Agent: v0.1.43 ││● gitea active 579M 2.6GB │
|
||||||
│● Used: 30% 2.3GB/7.6GB ││● gitea active 579M 2.6GB │
|
│Active users: cm, simon ││● nginx active 28M 24MB │
|
||||||
│● tmp: 0.0% 0B/2.0GB ││● gitea-runner-default active 11M 2.6GB │
|
│CPU: ││ ├─ ● gitea.cmtec.se 51ms │
|
||||||
│Disk nvme0n1: ││● haasp-core active 9M 1MB │
|
│● Load: 0.10 0.52 0.88 • 3000MHz ││ ├─ ● photos.cmtec.se 41ms │
|
||||||
│● Health: PASSED ││● haasp-mqtt active 3M 1MB │
|
│RAM: ││● postgresql active 112M 357MB │
|
||||||
│● Usage @root: 8.3% • 75.4/906.2 GB ││● haasp-webgrid active 10M 1MB │
|
│● Usage: 33% 2.6GB/7.6GB ││● redis-immich user-stopped │
|
||||||
│● Usage @boot: 5.9% • 0.1/1.0 GB ││● immich-server active 240M 45.1GB │
|
│● /tmp: 0% 0B/2.0GB ││● sshd active 2M 0 │
|
||||||
│ ││● mosquitto active 1M 1MB │
|
│Storage: ││● unifi active 594M 495MB │
|
||||||
│ ││● mysql active 38M 225MB │
|
│● root (Single): ││ │
|
||||||
│ ││● nginx active 28M 24MB │
|
│ ├─ ● nvme0n1 W: 1% ││ │
|
||||||
│ ││ ├─ ● gitea.cmtec.se 51ms │
|
│ └─ ● 18% 167.4GB/928.2GB ││ │
|
||||||
│ ││ ├─ ● haasp.cmtec.se 43ms │
|
|
||||||
│ ││ ├─ ● haasp.net 43ms │
|
|
||||||
│ ││ ├─ ● pages.cmtec.se 45ms │
|
|
||||||
└────────────────────────────────────┘│ ├─ ● photos.cmtec.se 41ms │
|
|
||||||
┌backup──────────────────────────────┐│ ├─ ● unifi.cmtec.se 46ms │
|
|
||||||
│Latest backup: ││ ├─ ● vault.cmtec.se 47ms │
|
|
||||||
│● Status: OK ││ ├─ ● www.kryddorten.se 81ms │
|
|
||||||
│Duration: 54s • Last: 4h ago ││ ├─ ● www.mariehall2.se 86ms │
|
|
||||||
│Disk usage: 48.2GB/915.8GB ││● postgresql active 112M 357MB │
|
|
||||||
│P/N: Samsung SSD 870 QVO 1TB ││● redis-immich active 8M 45.1GB │
|
|
||||||
│S/N: S5RRNF0W800639Y ││● sshd active 2M 0 │
|
|
||||||
│● gitea 2 archives 2.7GB ││● unifi active 594M 495MB │
|
|
||||||
│● immich 2 archives 45.0GB ││● vaultwarden active 12M 1MB │
|
|
||||||
│● kryddorten 2 archives 67.6MB ││ │
|
|
||||||
│● mariehall2 2 archives 321.8MB ││ │
|
|
||||||
│● nixosbox 2 archives 4.5MB ││ │
|
|
||||||
│● unifi 2 archives 2.9MB ││ │
|
|
||||||
│● vaultwarden 2 archives 305kB ││ │
|
|
||||||
└────────────────────────────────────┘└─────────────────────────────────────────────────┘
|
└────────────────────────────────────┘└─────────────────────────────────────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
**Navigation**: `←→` switch hosts, `r` refresh, `q` quit
|
### Navigation
|
||||||
|
- **Tab**: Switch between hosts
|
||||||
|
- **↑↓ or j/k**: Navigate services
|
||||||
|
- **s**: Start selected service (UserStart)
|
||||||
|
- **S**: Stop selected service (UserStop)
|
||||||
|
- **J**: Show service logs (journalctl in tmux popup)
|
||||||
|
- **L**: Show custom log files (tail -f custom paths in tmux popup)
|
||||||
|
- **R**: Rebuild current host
|
||||||
|
- **B**: Run backup on current host
|
||||||
|
- **q**: Quit
|
||||||
|
|
||||||
## Features
|
### Status Indicators
|
||||||
|
- **Green ●**: Active service
|
||||||
- **Real-time monitoring** - Dashboard updates every 1-2 seconds
|
- **Yellow ◐**: Inactive service (system issue)
|
||||||
- **Individual metric collection** - Granular data for flexible dashboard composition
|
- **Red ◯**: Failed service
|
||||||
- **Intelligent status aggregation** - Host-level status calculated from all services
|
- **Blue arrows**: Service transitioning (↑ starting, ↓ stopping, ↻ restarting)
|
||||||
- **Smart email notifications** - Batched, detailed alerts with service groupings
|
- **"user-stopped"**: Service stopped via dashboard (Status::OK)
|
||||||
- **Persistent state** - Prevents false notifications on restarts
|
|
||||||
- **ZMQ communication** - Efficient agent-to-dashboard messaging
|
|
||||||
- **Clean TUI** - Terminal-based dashboard with color-coded status indicators
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
### Core Components
|
|
||||||
|
|
||||||
- **Agent** (`cm-dashboard-agent`) - Collects metrics and sends via ZMQ
|
|
||||||
- **Dashboard** (`cm-dashboard`) - Real-time TUI display consuming metrics
|
|
||||||
- **Shared** (`cm-dashboard-shared`) - Common types and protocol
|
|
||||||
- **Status Aggregation** - Intelligent batching and notification management
|
|
||||||
- **Persistent Cache** - Maintains state across restarts
|
|
||||||
|
|
||||||
### Status Levels
|
|
||||||
|
|
||||||
- **🟢 Ok** - Service running normally
|
|
||||||
- **🔵 Pending** - Service starting/stopping/reloading
|
|
||||||
- **🟡 Warning** - Service issues (high load, memory, disk usage)
|
|
||||||
- **🔴 Critical** - Service failed or critical thresholds exceeded
|
|
||||||
- **❓ Unknown** - Service state cannot be determined
|
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
### Build
|
### Building
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# With Nix (recommended)
|
# With Nix (recommended)
|
||||||
@ -93,21 +113,20 @@ sudo apt install libssl-dev pkg-config # Ubuntu/Debian
|
|||||||
cargo build --workspace
|
cargo build --workspace
|
||||||
```
|
```
|
||||||
|
|
||||||
### Run
|
### Running
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Start agent (requires configuration file)
|
# Start agent (requires configuration)
|
||||||
./target/debug/cm-dashboard-agent --config /etc/cm-dashboard/agent.toml
|
./target/debug/cm-dashboard-agent --config /etc/cm-dashboard/agent.toml
|
||||||
|
|
||||||
# Start dashboard
|
# Start dashboard (inside tmux session)
|
||||||
./target/debug/cm-dashboard --config /path/to/dashboard.toml
|
tmux
|
||||||
|
./target/debug/cm-dashboard --config /etc/cm-dashboard/dashboard.toml
|
||||||
```
|
```
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
### Agent Configuration (`agent.toml`)
|
### Agent Configuration
|
||||||
|
|
||||||
The agent requires a comprehensive TOML configuration file:
|
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
collection_interval_seconds = 2
|
collection_interval_seconds = 2
|
||||||
@ -116,47 +135,27 @@ collection_interval_seconds = 2
|
|||||||
publisher_port = 6130
|
publisher_port = 6130
|
||||||
command_port = 6131
|
command_port = 6131
|
||||||
bind_address = "0.0.0.0"
|
bind_address = "0.0.0.0"
|
||||||
timeout_ms = 5000
|
transmission_interval_seconds = 2
|
||||||
heartbeat_interval_ms = 30000
|
|
||||||
|
|
||||||
[collectors.cpu]
|
[collectors.cpu]
|
||||||
enabled = true
|
enabled = true
|
||||||
interval_seconds = 2
|
interval_seconds = 2
|
||||||
load_warning_threshold = 9.0
|
load_warning_threshold = 5.0
|
||||||
load_critical_threshold = 10.0
|
load_critical_threshold = 10.0
|
||||||
temperature_warning_threshold = 100.0
|
|
||||||
temperature_critical_threshold = 110.0
|
|
||||||
|
|
||||||
[collectors.memory]
|
[collectors.memory]
|
||||||
enabled = true
|
enabled = true
|
||||||
interval_seconds = 2
|
interval_seconds = 2
|
||||||
usage_warning_percent = 80.0
|
usage_warning_percent = 80.0
|
||||||
usage_critical_percent = 95.0
|
|
||||||
|
|
||||||
[collectors.disk]
|
|
||||||
enabled = true
|
|
||||||
interval_seconds = 300
|
|
||||||
usage_warning_percent = 80.0
|
|
||||||
usage_critical_percent = 90.0
|
usage_critical_percent = 90.0
|
||||||
|
|
||||||
[[collectors.disk.filesystems]]
|
|
||||||
name = "root"
|
|
||||||
uuid = "4cade5ce-85a5-4a03-83c8-dfd1d3888d79"
|
|
||||||
mount_point = "/"
|
|
||||||
fs_type = "ext4"
|
|
||||||
monitor = true
|
|
||||||
|
|
||||||
[collectors.systemd]
|
[collectors.systemd]
|
||||||
enabled = true
|
enabled = true
|
||||||
interval_seconds = 10
|
interval_seconds = 10
|
||||||
memory_warning_mb = 1000.0
|
service_name_filters = ["nginx*", "postgresql*", "docker*", "sshd*"]
|
||||||
memory_critical_mb = 2000.0
|
excluded_services = ["nginx-config-reload", "systemd-", "getty@"]
|
||||||
service_name_filters = [
|
nginx_latency_critical_ms = 1000.0
|
||||||
"nginx", "postgresql", "redis", "docker", "sshd"
|
http_timeout_seconds = 10
|
||||||
]
|
|
||||||
excluded_services = [
|
|
||||||
"nginx-config-reload", "sshd-keygen"
|
|
||||||
]
|
|
||||||
|
|
||||||
[notifications]
|
[notifications]
|
||||||
enabled = true
|
enabled = true
|
||||||
@ -164,251 +163,203 @@ smtp_host = "localhost"
|
|||||||
smtp_port = 25
|
smtp_port = 25
|
||||||
from_email = "{hostname}@example.com"
|
from_email = "{hostname}@example.com"
|
||||||
to_email = "admin@example.com"
|
to_email = "admin@example.com"
|
||||||
rate_limit_minutes = 0
|
aggregation_interval_seconds = 30
|
||||||
trigger_on_warnings = true
|
|
||||||
trigger_on_failures = true
|
|
||||||
recovery_requires_all_ok = true
|
|
||||||
suppress_individual_recoveries = true
|
|
||||||
|
|
||||||
[status_aggregation]
|
|
||||||
enabled = true
|
|
||||||
aggregation_method = "worst_case"
|
|
||||||
notification_interval_seconds = 30
|
|
||||||
|
|
||||||
[cache]
|
|
||||||
persist_path = "/var/lib/cm-dashboard/cache.json"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Dashboard Configuration (`dashboard.toml`)
|
### Dashboard Configuration
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
[zmq]
|
[zmq]
|
||||||
hosts = [
|
subscriber_ports = [6130]
|
||||||
{ name = "server1", address = "192.168.1.100", port = 6130 },
|
|
||||||
{ name = "server2", address = "192.168.1.101", port = 6130 }
|
|
||||||
]
|
|
||||||
connection_timeout_ms = 5000
|
|
||||||
reconnect_interval_ms = 10000
|
|
||||||
|
|
||||||
[ui]
|
[hosts]
|
||||||
refresh_interval_ms = 1000
|
predefined_hosts = ["cmbox", "srv01", "srv02"]
|
||||||
theme = "dark"
|
|
||||||
|
[ssh]
|
||||||
|
rebuild_user = "cm"
|
||||||
|
rebuild_alias = "nixos-rebuild-cmtec"
|
||||||
|
backup_alias = "cm-backup-run"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Collectors
|
## Technical Implementation
|
||||||
|
|
||||||
The agent implements several specialized collectors:
|
### Collectors
|
||||||
|
|
||||||
### CPU Collector (`cpu.rs`)
|
#### Systemd Collector
|
||||||
|
- **Service Discovery**: Uses `systemctl list-unit-files` + `list-units --all`
|
||||||
|
- **Status Calculation**: Checks user-stopped flag before assigning Warning status
|
||||||
|
- **Memory Tracking**: Per-service memory usage via `systemctl show`
|
||||||
|
- **Sub-services**: Nginx site latency, Docker containers
|
||||||
|
- **User-stopped Integration**: `UserStoppedServiceTracker::is_service_user_stopped()`
|
||||||
|
|
||||||
- Load average (1, 5, 15 minute)
|
#### User-Stopped Service Tracker
|
||||||
- CPU temperature monitoring
|
- **Storage**: `/var/lib/cm-dashboard/user-stopped-services.json`
|
||||||
- Real-time process monitoring (top CPU consumers)
|
- **Thread Safety**: Global singleton with `Arc<Mutex<>>`
|
||||||
- Status calculation with configurable thresholds
|
- **Persistence**: Automatic save on state changes
|
||||||
|
- **Global Access**: Static methods for collector integration
|
||||||
|
|
||||||
### Memory Collector (`memory.rs`)
|
#### Other Collectors
|
||||||
|
- **CPU**: Load average, temperature, frequency monitoring
|
||||||
|
- **Memory**: RAM/swap usage, tmpfs monitoring
|
||||||
|
- **Disk**: Filesystem usage, SMART health data
|
||||||
|
- **NixOS**: Build version, active users, agent version
|
||||||
|
- **Backup**: Borgbackup repository status and metrics
|
||||||
|
|
||||||
- RAM usage (total, used, available)
|
### ZMQ Protocol
|
||||||
- Swap monitoring
|
|
||||||
- Real-time process monitoring (top RAM consumers)
|
|
||||||
- Memory pressure detection
|
|
||||||
|
|
||||||
### Disk Collector (`disk.rs`)
|
```rust
|
||||||
|
// Metric Message
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub struct MetricMessage {
|
||||||
|
pub hostname: String,
|
||||||
|
pub timestamp: u64,
|
||||||
|
pub metrics: Vec<Metric>,
|
||||||
|
}
|
||||||
|
|
||||||
- Filesystem usage per mount point
|
// Service Commands
|
||||||
- SMART health monitoring
|
pub enum AgentCommand {
|
||||||
- Temperature and wear tracking
|
ServiceControl {
|
||||||
- Configurable filesystem monitoring
|
service_name: String,
|
||||||
|
action: ServiceAction,
|
||||||
|
},
|
||||||
|
SystemRebuild { /* SSH config */ },
|
||||||
|
CollectNow,
|
||||||
|
}
|
||||||
|
|
||||||
### Systemd Collector (`systemd.rs`)
|
pub enum ServiceAction {
|
||||||
|
Start, // System-initiated
|
||||||
|
Stop, // System-initiated
|
||||||
|
UserStart, // User via dashboard (clears user-stopped)
|
||||||
|
UserStop, // User via dashboard (marks user-stopped)
|
||||||
|
Status,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
- Service status monitoring (`active`, `inactive`, `failed`)
|
### Maintenance Mode
|
||||||
- Memory usage per service
|
|
||||||
- Service filtering and exclusions
|
|
||||||
- Handles transitional states (`Status::Pending`)
|
|
||||||
|
|
||||||
### Backup Collector (`backup.rs`)
|
Suppress notifications during planned maintenance:
|
||||||
|
|
||||||
- Reads TOML status files from backup systems
|
```bash
|
||||||
- Archive age verification
|
# Enable maintenance mode
|
||||||
- Disk usage tracking
|
touch /tmp/cm-maintenance
|
||||||
- Repository health monitoring
|
|
||||||
|
# Perform maintenance
|
||||||
|
systemctl stop service
|
||||||
|
# ... work ...
|
||||||
|
systemctl start service
|
||||||
|
|
||||||
|
# Disable maintenance mode
|
||||||
|
rm /tmp/cm-maintenance
|
||||||
|
```
|
||||||
|
|
||||||
## Email Notifications
|
## Email Notifications
|
||||||
|
|
||||||
### Intelligent Batching
|
### Intelligent Batching
|
||||||
|
- **Real-time dashboard**: Immediate status updates
|
||||||
|
- **Batched emails**: Aggregated every 30 seconds
|
||||||
|
- **Smart grouping**: Services organized by severity
|
||||||
|
- **Recovery suppression**: Reduces notification spam
|
||||||
|
|
||||||
The system implements smart notification batching to prevent email spam:
|
### Example Alert
|
||||||
|
|
||||||
- **Real-time dashboard updates** - Status changes appear immediately
|
|
||||||
- **Batched email notifications** - Aggregated every 30 seconds
|
|
||||||
- **Detailed groupings** - Services organized by severity
|
|
||||||
|
|
||||||
### Example Alert Email
|
|
||||||
|
|
||||||
```
|
```
|
||||||
Subject: Status Alert: 2 critical, 1 warning, 15 started
|
Subject: Status Alert: 1 critical, 2 warnings, 0 recoveries
|
||||||
|
|
||||||
Status Summary (30s duration)
|
Status Summary (30s duration)
|
||||||
Host Status: Ok → Warning
|
Host Status: Ok → Warning
|
||||||
|
|
||||||
🔴 CRITICAL ISSUES (2):
|
🔴 CRITICAL ISSUES (1):
|
||||||
postgresql: Ok → Critical
|
postgresql: Ok → Critical (memory usage 95%)
|
||||||
nginx: Warning → Critical
|
|
||||||
|
|
||||||
🟡 WARNINGS (1):
|
🟡 WARNINGS (2):
|
||||||
redis: Ok → Warning (memory usage 85%)
|
nginx: Ok → Warning (high load 8.5)
|
||||||
|
redis: user-stopped → Warning (restarted by system)
|
||||||
|
|
||||||
✅ RECOVERIES (0):
|
✅ RECOVERIES (0):
|
||||||
|
|
||||||
🟢 SERVICE STARTUPS (15):
|
|
||||||
docker: Unknown → Ok
|
|
||||||
sshd: Unknown → Ok
|
|
||||||
...
|
|
||||||
|
|
||||||
--
|
--
|
||||||
CM Dashboard Agent
|
CM Dashboard Agent v0.1.43
|
||||||
Generated at 2025-10-21 19:42:42 CET
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Individual Metrics Architecture
|
|
||||||
|
|
||||||
The system follows a **metrics-first architecture**:
|
|
||||||
|
|
||||||
### Agent Side
|
|
||||||
|
|
||||||
```rust
|
|
||||||
// Agent collects individual metrics
|
|
||||||
vec![
|
|
||||||
Metric::new("cpu_load_1min".to_string(), MetricValue::Float(2.5), Status::Ok),
|
|
||||||
Metric::new("memory_usage_percent".to_string(), MetricValue::Float(78.5), Status::Warning),
|
|
||||||
Metric::new("service_nginx_status".to_string(), MetricValue::String("active".to_string()), Status::Ok),
|
|
||||||
]
|
|
||||||
```
|
|
||||||
|
|
||||||
### Dashboard Side
|
|
||||||
|
|
||||||
```rust
|
|
||||||
// Widgets subscribe to specific metrics
|
|
||||||
impl Widget for CpuWidget {
|
|
||||||
fn update_from_metrics(&mut self, metrics: &[&Metric]) {
|
|
||||||
for metric in metrics {
|
|
||||||
match metric.name.as_str() {
|
|
||||||
"cpu_load_1min" => self.load_1min = metric.value.as_f32(),
|
|
||||||
"cpu_load_5min" => self.load_5min = metric.value.as_f32(),
|
|
||||||
"cpu_temperature_celsius" => self.temperature = metric.value.as_f32(),
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Persistent Cache
|
|
||||||
|
|
||||||
The cache system prevents false notifications:
|
|
||||||
|
|
||||||
- **Automatic saving** - Saves when service status changes
|
|
||||||
- **Persistent storage** - Maintains state across agent restarts
|
|
||||||
- **Simple design** - No complex TTL or cleanup logic
|
|
||||||
- **Status preservation** - Prevents duplicate notifications
|
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
### Project Structure
|
### Project Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
cm-dashboard/
|
cm-dashboard/
|
||||||
├── agent/ # Metrics collection agent
|
├── agent/ # Metrics collection agent
|
||||||
│ ├── src/
|
│ ├── src/
|
||||||
│ │ ├── collectors/ # CPU, memory, disk, systemd, backup
|
│ │ ├── collectors/ # CPU, memory, disk, systemd, backup, nixos
|
||||||
│ │ ├── status/ # Status aggregation and notifications
|
│ │ ├── service_tracker.rs # User-stopped service tracking
|
||||||
│ │ ├── cache/ # Persistent metric caching
|
│ │ ├── status/ # Status aggregation and notifications
|
||||||
│ │ ├── config/ # TOML configuration loading
|
│ │ ├── config/ # TOML configuration loading
|
||||||
│ │ └── notifications/ # Email notification system
|
│ │ └── communication/ # ZMQ message handling
|
||||||
├── dashboard/ # TUI dashboard application
|
├── dashboard/ # TUI dashboard application
|
||||||
│ ├── src/
|
│ ├── src/
|
||||||
│ │ ├── ui/widgets/ # CPU, memory, services, backup widgets
|
│ │ ├── ui/widgets/ # CPU, memory, services, backup, system
|
||||||
│ │ ├── metrics/ # Metric storage and filtering
|
│ │ ├── communication/ # ZMQ consumption and commands
|
||||||
│ │ └── communication/ # ZMQ metric consumption
|
│ │ └── app.rs # Main application loop
|
||||||
├── shared/ # Shared types and utilities
|
├── shared/ # Shared types and utilities
|
||||||
│ └── src/
|
│ └── src/
|
||||||
│ ├── metrics.rs # Metric, Status, and Value types
|
│ ├── metrics.rs # Metric, Status, StatusTracker types
|
||||||
│ ├── protocol.rs # ZMQ message format
|
│ ├── protocol.rs # ZMQ message format
|
||||||
│ └── cache.rs # Cache configuration
|
│ └── cache.rs # Cache configuration
|
||||||
└── README.md # This file
|
└── CLAUDE.md # Development guidelines and rules
|
||||||
```
|
```
|
||||||
|
|
||||||
### Building
|
### Testing
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Debug build
|
# Build and test
|
||||||
cargo build --workspace
|
nix-shell -p openssl pkg-config --run "cargo build --workspace"
|
||||||
|
nix-shell -p openssl pkg-config --run "cargo test --workspace"
|
||||||
|
|
||||||
# Release build
|
# Code quality
|
||||||
cargo build --workspace --release
|
cargo fmt --all
|
||||||
|
|
||||||
# Run tests
|
|
||||||
cargo test --workspace
|
|
||||||
|
|
||||||
# Check code formatting
|
|
||||||
cargo fmt --all -- --check
|
|
||||||
|
|
||||||
# Run clippy linter
|
|
||||||
cargo clippy --workspace -- -D warnings
|
cargo clippy --workspace -- -D warnings
|
||||||
```
|
```
|
||||||
|
|
||||||
### Dependencies
|
## Deployment
|
||||||
|
|
||||||
- **tokio** - Async runtime
|
### Automated Binary Releases
|
||||||
- **zmq** - Message passing between agent and dashboard
|
```bash
|
||||||
- **ratatui** - Terminal user interface
|
# Create new release
|
||||||
- **serde** - Serialization for metrics and config
|
cd ~/projects/cm-dashboard
|
||||||
- **anyhow/thiserror** - Error handling
|
git tag v0.1.X
|
||||||
- **tracing** - Structured logging
|
git push origin v0.1.X
|
||||||
- **lettre** - SMTP email notifications
|
```
|
||||||
- **clap** - Command-line argument parsing
|
|
||||||
- **toml** - Configuration file parsing
|
|
||||||
|
|
||||||
## NixOS Integration
|
This triggers automated:
|
||||||
|
- Static binary compilation with `RUSTFLAGS="-C target-feature=+crt-static"`
|
||||||
|
- GitHub-style release creation
|
||||||
|
- Tarball upload to Gitea
|
||||||
|
|
||||||
This project is designed for declarative deployment via NixOS:
|
### NixOS Integration
|
||||||
|
Update `~/projects/nixosbox/hosts/services/cm-dashboard.nix`:
|
||||||
### Configuration Generation
|
|
||||||
|
|
||||||
The NixOS module automatically generates the agent configuration:
|
|
||||||
|
|
||||||
```nix
|
```nix
|
||||||
# hosts/common/cm-dashboard.nix
|
version = "v0.1.43";
|
||||||
services.cm-dashboard-agent = {
|
src = pkgs.fetchurl {
|
||||||
enable = true;
|
url = "https://gitea.cmtec.se/cm/cm-dashboard/releases/download/${version}/cm-dashboard-linux-x86_64.tar.gz";
|
||||||
port = 6130;
|
sha256 = "sha256-HASH";
|
||||||
};
|
};
|
||||||
```
|
```
|
||||||
|
|
||||||
### Deployment
|
Get hash via:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Update NixOS configuration
|
cd ~/projects/nixosbox
|
||||||
git add hosts/common/cm-dashboard.nix
|
nix-build --no-out-link -E 'with import <nixpkgs> {}; fetchurl {
|
||||||
git commit -m "Update cm-dashboard configuration"
|
url = "URL_HERE";
|
||||||
git push
|
sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
|
||||||
|
}' 2>&1 | grep "got:"
|
||||||
# Rebuild system (user-performed)
|
|
||||||
sudo nixos-rebuild switch --flake .
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Monitoring Intervals
|
## Monitoring Intervals
|
||||||
|
|
||||||
- **CPU/Memory**: 2 seconds (real-time monitoring)
|
- **Metrics Collection**: 2 seconds (CPU, memory, services)
|
||||||
- **Disk usage**: 300 seconds (5 minutes)
|
- **Metric Transmission**: 2 seconds (ZMQ publish)
|
||||||
- **Systemd services**: 10 seconds
|
- **Dashboard Updates**: 1 second (UI refresh)
|
||||||
- **SMART health**: 600 seconds (10 minutes)
|
- **Email Notifications**: 30 seconds (batched)
|
||||||
- **Backup status**: 60 seconds (1 minute)
|
- **Disk Monitoring**: 300 seconds (5 minutes)
|
||||||
- **Email notifications**: 30 seconds (batched)
|
- **Service Discovery**: 300 seconds (5 minutes cache)
|
||||||
- **Dashboard updates**: 1 second (real-time display)
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
MIT License - see LICENSE file for details
|
MIT License - see LICENSE file for details.
|
||||||
|
|
||||||
63
TODO.md
63
TODO.md
@ -1,63 +0,0 @@
|
|||||||
# TODO
|
|
||||||
|
|
||||||
## Systemd filtering (agent)
|
|
||||||
|
|
||||||
- remove user systemd collection
|
|
||||||
- reduce number of systemctl call
|
|
||||||
- Cahnge so only services in include list are detected
|
|
||||||
- Filter on exact name
|
|
||||||
- Add support for "\*" in filtering
|
|
||||||
|
|
||||||
## System panel (agent/dashboard)
|
|
||||||
|
|
||||||
use following layout:
|
|
||||||
'''
|
|
||||||
NixOS:
|
|
||||||
Build: xxxxxx
|
|
||||||
Agen: xxxxxx
|
|
||||||
CPU:
|
|
||||||
● Load: 0.02 0.31 0.86
|
|
||||||
└─ Freq: 3000MHz
|
|
||||||
RAM:
|
|
||||||
● Usage: 33% 2.6GB/7.6GB
|
|
||||||
└─ ● /tmp: 0% 0B/2.0GB
|
|
||||||
Storage:
|
|
||||||
● /:
|
|
||||||
├─ ● nvme0n1 T: 40C • W: 4%
|
|
||||||
└─ ● 8% 75.0GB/906.2GB
|
|
||||||
'''
|
|
||||||
|
|
||||||
- Add support to show login/active users
|
|
||||||
- Add support to show timestamp/version for latest nixos rebuild
|
|
||||||
|
|
||||||
## Backup panel (dashboard)
|
|
||||||
|
|
||||||
use following layout:
|
|
||||||
'''
|
|
||||||
Latest backup:
|
|
||||||
● <timestamp>
|
|
||||||
└─ Duration: 1.3m
|
|
||||||
Disk:
|
|
||||||
● Samsung SSD 870 QVO 1TB
|
|
||||||
├─ S/N: S5RRNF0W800639Y
|
|
||||||
└─ Usage: 50.5GB/915.8GB
|
|
||||||
Repos:
|
|
||||||
● gitea (4) 5.1GB
|
|
||||||
● immich (4) 45.0GB
|
|
||||||
● kryddorten (4) 67.8MB
|
|
||||||
● mariehall2 (4) 322.7MB
|
|
||||||
● nixosbox (4) 5.5MB
|
|
||||||
● unifi (4) 5.7MB
|
|
||||||
● vaultwarden (4) 508kB
|
|
||||||
'''
|
|
||||||
|
|
||||||
## Keyboard navigation and scrolling (dashboard)
|
|
||||||
|
|
||||||
- Add keyboard navigation between panels "Shift-Tab"
|
|
||||||
- Add lower statusbar with dynamic updated shortcuts when switchng between panels
|
|
||||||
|
|
||||||
## Remote execution (agent/dashboard)
|
|
||||||
|
|
||||||
- Add support for send command via dashboard to agent to do nixos rebuid
|
|
||||||
- Add support for navigating services in dashboard and trigger start/stop/restart
|
|
||||||
- Add support for trigger backup
|
|
||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.0"
|
version = "0.1.275"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
@ -20,4 +20,5 @@ gethostname = { workspace = true }
|
|||||||
chrono-tz = "0.8"
|
chrono-tz = "0.8"
|
||||||
toml = { workspace = true }
|
toml = { workspace = true }
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
reqwest = { version = "0.11", features = ["json", "blocking"] }
|
reqwest = { version = "0.11", features = ["json", "blocking"] }
|
||||||
|
futures = "0.3"
|
||||||
@ -1,23 +1,49 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use gethostname::gethostname;
|
use gethostname::gethostname;
|
||||||
use std::time::Duration;
|
use std::time::{Duration, Instant};
|
||||||
use tokio::time::interval;
|
use tokio::time::interval;
|
||||||
use tracing::{debug, error, info};
|
use tracing::{debug, error, info};
|
||||||
|
|
||||||
use crate::communication::{AgentCommand, ServiceAction, ZmqHandler};
|
use crate::communication::ZmqHandler;
|
||||||
use crate::config::AgentConfig;
|
use crate::config::AgentConfig;
|
||||||
use crate::metrics::MetricCollectionManager;
|
use crate::collectors::{
|
||||||
|
Collector,
|
||||||
|
backup::BackupCollector,
|
||||||
|
cpu::CpuCollector,
|
||||||
|
disk::DiskCollector,
|
||||||
|
memory::MemoryCollector,
|
||||||
|
network::NetworkCollector,
|
||||||
|
nixos::NixOSCollector,
|
||||||
|
systemd::SystemdCollector,
|
||||||
|
};
|
||||||
use crate::notifications::NotificationManager;
|
use crate::notifications::NotificationManager;
|
||||||
use crate::status::HostStatusManager;
|
use cm_dashboard_shared::AgentData;
|
||||||
use cm_dashboard_shared::{Metric, MetricMessage};
|
|
||||||
|
/// Wrapper for collectors with timing information
|
||||||
|
struct TimedCollector {
|
||||||
|
collector: Box<dyn Collector>,
|
||||||
|
interval: Duration,
|
||||||
|
last_collection: Option<Instant>,
|
||||||
|
name: String,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Agent {
|
pub struct Agent {
|
||||||
hostname: String,
|
hostname: String,
|
||||||
config: AgentConfig,
|
config: AgentConfig,
|
||||||
zmq_handler: ZmqHandler,
|
zmq_handler: ZmqHandler,
|
||||||
metric_manager: MetricCollectionManager,
|
collectors: Vec<TimedCollector>,
|
||||||
notification_manager: NotificationManager,
|
notification_manager: NotificationManager,
|
||||||
host_status_manager: HostStatusManager,
|
previous_status: Option<SystemStatus>,
|
||||||
|
cached_agent_data: AgentData,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Track system component status for change detection
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct SystemStatus {
|
||||||
|
cpu_load_status: cm_dashboard_shared::Status,
|
||||||
|
cpu_temperature_status: cm_dashboard_shared::Status,
|
||||||
|
memory_usage_status: cm_dashboard_shared::Status,
|
||||||
|
// Add more as needed
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Agent {
|
impl Agent {
|
||||||
@ -38,70 +64,130 @@ impl Agent {
|
|||||||
config.zmq.publisher_port
|
config.zmq.publisher_port
|
||||||
);
|
);
|
||||||
|
|
||||||
// Initialize metric collection manager with cache config
|
// Initialize collectors with timing information
|
||||||
let metric_manager = MetricCollectionManager::new(&config.collectors, &config).await?;
|
let mut collectors: Vec<TimedCollector> = Vec::new();
|
||||||
info!("Metric collection manager initialized");
|
|
||||||
|
// Add enabled collectors
|
||||||
|
if config.collectors.cpu.enabled {
|
||||||
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(CpuCollector::new(config.collectors.cpu.clone())),
|
||||||
|
interval: Duration::from_secs(config.collectors.cpu.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "CPU".to_string(),
|
||||||
|
});
|
||||||
|
info!("CPU collector initialized with {}s interval", config.collectors.cpu.interval_seconds);
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.collectors.memory.enabled {
|
||||||
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(MemoryCollector::new(config.collectors.memory.clone())),
|
||||||
|
interval: Duration::from_secs(config.collectors.memory.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Memory".to_string(),
|
||||||
|
});
|
||||||
|
info!("Memory collector initialized with {}s interval", config.collectors.memory.interval_seconds);
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.collectors.disk.enabled {
|
||||||
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(DiskCollector::new(config.collectors.disk.clone())),
|
||||||
|
interval: Duration::from_secs(config.collectors.disk.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Disk".to_string(),
|
||||||
|
});
|
||||||
|
info!("Disk collector initialized with {}s interval", config.collectors.disk.interval_seconds);
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.collectors.systemd.enabled {
|
||||||
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(SystemdCollector::new(config.collectors.systemd.clone())),
|
||||||
|
interval: Duration::from_secs(config.collectors.systemd.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Systemd".to_string(),
|
||||||
|
});
|
||||||
|
info!("Systemd collector initialized with {}s interval", config.collectors.systemd.interval_seconds);
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.collectors.backup.enabled {
|
||||||
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(BackupCollector::new()),
|
||||||
|
interval: Duration::from_secs(config.collectors.backup.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Backup".to_string(),
|
||||||
|
});
|
||||||
|
info!("Backup collector initialized with {}s interval", config.collectors.backup.interval_seconds);
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.collectors.network.enabled {
|
||||||
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(NetworkCollector::new(config.collectors.network.clone())),
|
||||||
|
interval: Duration::from_secs(config.collectors.network.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Network".to_string(),
|
||||||
|
});
|
||||||
|
info!("Network collector initialized with {}s interval", config.collectors.network.interval_seconds);
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.collectors.nixos.enabled {
|
||||||
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(NixOSCollector::new(config.collectors.nixos.clone())),
|
||||||
|
interval: Duration::from_secs(config.collectors.nixos.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "NixOS".to_string(),
|
||||||
|
});
|
||||||
|
info!("NixOS collector initialized with {}s interval", config.collectors.nixos.interval_seconds);
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Initialized {} collectors", collectors.len());
|
||||||
|
|
||||||
// Initialize notification manager
|
// Initialize notification manager
|
||||||
let notification_manager = NotificationManager::new(&config.notifications, &hostname)?;
|
let notification_manager = NotificationManager::new(&config.notifications, &hostname)?;
|
||||||
info!("Notification manager initialized");
|
info!("Notification manager initialized");
|
||||||
|
|
||||||
// Initialize host status manager
|
// Initialize cached agent data
|
||||||
let host_status_manager = HostStatusManager::new(config.status_aggregation.clone());
|
let cached_agent_data = AgentData::new(hostname.clone(), env!("CARGO_PKG_VERSION").to_string());
|
||||||
info!("Host status manager initialized");
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
hostname,
|
hostname,
|
||||||
config,
|
config,
|
||||||
zmq_handler,
|
zmq_handler,
|
||||||
metric_manager,
|
collectors,
|
||||||
notification_manager,
|
notification_manager,
|
||||||
host_status_manager,
|
previous_status: None,
|
||||||
|
cached_agent_data,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Main agent loop with structured data collection
|
||||||
pub async fn run(&mut self, mut shutdown_rx: tokio::sync::oneshot::Receiver<()>) -> Result<()> {
|
pub async fn run(&mut self, mut shutdown_rx: tokio::sync::oneshot::Receiver<()>) -> Result<()> {
|
||||||
info!("Starting agent main loop with separated collection and transmission");
|
info!("Starting agent main loop");
|
||||||
|
|
||||||
// CRITICAL: Collect ALL data immediately at startup before entering the loop
|
// Initial collection
|
||||||
info!("Performing initial FORCE collection of all metrics at startup");
|
if let Err(e) = self.collect_and_broadcast().await {
|
||||||
if let Err(e) = self.collect_all_metrics_force().await {
|
error!("Initial metric collection failed: {}", e);
|
||||||
error!("Failed to collect initial metrics: {}", e);
|
|
||||||
} else {
|
|
||||||
info!("Initial metric collection completed - all data cached and ready");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Separate intervals for collection and transmission
|
// Set up intervals
|
||||||
let mut collection_interval =
|
let mut transmission_interval = interval(Duration::from_secs(
|
||||||
interval(Duration::from_secs(self.config.collection_interval_seconds));
|
self.config.zmq.transmission_interval_seconds,
|
||||||
let mut transmission_interval = interval(Duration::from_secs(1)); // ZMQ broadcast every 1 second
|
));
|
||||||
let mut notification_interval = interval(Duration::from_secs(self.config.status_aggregation.notification_interval_seconds));
|
let mut notification_interval = interval(Duration::from_secs(30)); // Check notifications every 30s
|
||||||
|
|
||||||
|
// Skip initial ticks to avoid immediate execution
|
||||||
|
transmission_interval.tick().await;
|
||||||
|
notification_interval.tick().await;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = collection_interval.tick() => {
|
|
||||||
// Only collect and cache metrics, no ZMQ transmission
|
|
||||||
if let Err(e) = self.collect_metrics_only().await {
|
|
||||||
error!("Failed to collect metrics: {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ = transmission_interval.tick() => {
|
_ = transmission_interval.tick() => {
|
||||||
// Send all metrics via ZMQ every 1 second
|
if let Err(e) = self.collect_and_broadcast().await {
|
||||||
if let Err(e) = self.broadcast_all_metrics().await {
|
error!("Failed to collect and broadcast metrics: {}", e);
|
||||||
error!("Failed to broadcast metrics: {}", e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ = notification_interval.tick() => {
|
_ = notification_interval.tick() => {
|
||||||
// Process batched notifications
|
// Process any pending notifications
|
||||||
if let Err(e) = self.host_status_manager.process_pending_notifications(&mut self.notification_manager).await {
|
// NOTE: With structured data, we might need to implement status tracking differently
|
||||||
error!("Failed to process pending notifications: {}", e);
|
// For now, we skip this until status evaluation is migrated
|
||||||
}
|
|
||||||
}
|
|
||||||
// Handle incoming commands (check periodically)
|
|
||||||
_ = tokio::time::sleep(Duration::from_millis(100)) => {
|
|
||||||
if let Err(e) = self.handle_commands().await {
|
|
||||||
error!("Error handling commands: {}", e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_ = &mut shutdown_rx => {
|
_ = &mut shutdown_rx => {
|
||||||
info!("Shutdown signal received, stopping agent loop");
|
info!("Shutdown signal received, stopping agent loop");
|
||||||
@ -114,303 +200,135 @@ impl Agent {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn collect_all_metrics_force(&mut self) -> Result<()> {
|
/// Collect structured data from all collectors and broadcast via ZMQ
|
||||||
info!("Starting FORCE metric collection for startup");
|
async fn collect_and_broadcast(&mut self) -> Result<()> {
|
||||||
|
debug!("Starting structured data collection");
|
||||||
|
|
||||||
// Force collect all metrics from all collectors immediately
|
// Collect data from collectors whose intervals have elapsed
|
||||||
let metrics = self.metric_manager.collect_all_metrics_force().await?;
|
// Update cached_agent_data with new data
|
||||||
|
let now = Instant::now();
|
||||||
|
for timed_collector in &mut self.collectors {
|
||||||
|
let should_collect = match timed_collector.last_collection {
|
||||||
|
None => true, // First collection
|
||||||
|
Some(last_time) => now.duration_since(last_time) >= timed_collector.interval,
|
||||||
|
};
|
||||||
|
|
||||||
if metrics.is_empty() {
|
if should_collect {
|
||||||
error!("No metrics collected during force collection!");
|
if let Err(e) = timed_collector.collector.collect_structured(&mut self.cached_agent_data).await {
|
||||||
return Ok(());
|
error!("Collector {} failed: {}", timed_collector.name, e);
|
||||||
}
|
// Update last_collection time even on failure to prevent immediate retries
|
||||||
|
timed_collector.last_collection = Some(now);
|
||||||
info!("Force collected and cached {} metrics", metrics.len());
|
} else {
|
||||||
|
timed_collector.last_collection = Some(now);
|
||||||
// Process metrics through status manager
|
debug!(
|
||||||
self.process_metrics(&metrics).await;
|
"Collected from {} ({}s interval)",
|
||||||
|
timed_collector.name,
|
||||||
Ok(())
|
timed_collector.interval.as_secs()
|
||||||
}
|
);
|
||||||
|
|
||||||
async fn collect_metrics_only(&mut self) -> Result<()> {
|
|
||||||
debug!("Starting metric collection cycle (cache only)");
|
|
||||||
|
|
||||||
// Collect all metrics from all collectors and cache them
|
|
||||||
let metrics = self.metric_manager.collect_all_metrics().await?;
|
|
||||||
|
|
||||||
if metrics.is_empty() {
|
|
||||||
debug!("No metrics collected this cycle");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("Collected and cached {} metrics", metrics.len());
|
|
||||||
|
|
||||||
// Process metrics through status manager
|
|
||||||
self.process_metrics(&metrics).await;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn broadcast_all_metrics(&mut self) -> Result<()> {
|
|
||||||
debug!("Broadcasting all metrics via ZMQ");
|
|
||||||
|
|
||||||
// Get all current metrics from collectors
|
|
||||||
let mut metrics = self.metric_manager.collect_all_metrics().await?;
|
|
||||||
|
|
||||||
// Add the host status summary metric from status manager
|
|
||||||
let host_status_metric = self.host_status_manager.get_host_status_metric();
|
|
||||||
metrics.push(host_status_metric);
|
|
||||||
|
|
||||||
if metrics.is_empty() {
|
|
||||||
debug!("No metrics to broadcast");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("Broadcasting {} metrics (including host status summary)", metrics.len());
|
|
||||||
|
|
||||||
// Create and send message with all current data
|
|
||||||
let message = MetricMessage::new(self.hostname.clone(), metrics);
|
|
||||||
self.zmq_handler.publish_metrics(&message).await?;
|
|
||||||
|
|
||||||
debug!("Metrics broadcasted successfully");
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn process_metrics(&mut self, metrics: &[Metric]) {
|
|
||||||
for metric in metrics {
|
|
||||||
self.host_status_manager.process_metric(metric, &mut self.notification_manager).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn handle_commands(&mut self) -> Result<()> {
|
|
||||||
// Try to receive commands (non-blocking)
|
|
||||||
match self.zmq_handler.try_receive_command() {
|
|
||||||
Ok(Some(command)) => {
|
|
||||||
info!("Received command: {:?}", command);
|
|
||||||
self.process_command(command).await?;
|
|
||||||
}
|
|
||||||
Ok(None) => {
|
|
||||||
// No command available - this is normal
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
error!("Error receiving command: {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn process_command(&mut self, command: AgentCommand) -> Result<()> {
|
|
||||||
match command {
|
|
||||||
AgentCommand::CollectNow => {
|
|
||||||
info!("Processing CollectNow command");
|
|
||||||
if let Err(e) = self.collect_metrics_only().await {
|
|
||||||
error!("Failed to collect metrics on command: {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
AgentCommand::SetInterval { seconds } => {
|
|
||||||
info!("Processing SetInterval command: {} seconds", seconds);
|
|
||||||
// Note: This would require modifying the interval, which is complex
|
|
||||||
// For now, just log the request
|
|
||||||
info!("Interval change requested but not implemented yet");
|
|
||||||
}
|
|
||||||
AgentCommand::ToggleCollector { name, enabled } => {
|
|
||||||
info!(
|
|
||||||
"Processing ToggleCollector command: {} -> {}",
|
|
||||||
name, enabled
|
|
||||||
);
|
|
||||||
// Note: This would require dynamic collector management
|
|
||||||
info!("Collector toggle requested but not implemented yet");
|
|
||||||
}
|
|
||||||
AgentCommand::Ping => {
|
|
||||||
info!("Processing Ping command - agent is alive");
|
|
||||||
// Could send a response back via ZMQ if needed
|
|
||||||
}
|
|
||||||
AgentCommand::ServiceControl { service_name, action } => {
|
|
||||||
info!("Processing ServiceControl command: {} {:?}", service_name, action);
|
|
||||||
if let Err(e) = self.handle_service_control(&service_name, &action).await {
|
|
||||||
error!("Failed to execute service control: {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
AgentCommand::SystemRebuild { git_url, git_branch, working_dir, api_key_file } => {
|
|
||||||
info!("Processing SystemRebuild command: {} @ {} -> {}", git_url, git_branch, working_dir);
|
|
||||||
if let Err(e) = self.handle_system_rebuild(&git_url, &git_branch, &working_dir, api_key_file.as_deref()).await {
|
|
||||||
error!("Failed to execute system rebuild: {}", e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update timestamp on cached data
|
||||||
|
self.cached_agent_data.timestamp = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_secs();
|
||||||
|
|
||||||
|
// Clone for notification check (to avoid borrow issues)
|
||||||
|
let agent_data_snapshot = self.cached_agent_data.clone();
|
||||||
|
|
||||||
|
// Check for status changes and send notifications
|
||||||
|
if let Err(e) = self.check_status_changes_and_notify(&agent_data_snapshot).await {
|
||||||
|
error!("Failed to check status changes: {}", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Broadcast the cached structured data via ZMQ
|
||||||
|
if let Err(e) = self.zmq_handler.publish_agent_data(&agent_data_snapshot).await {
|
||||||
|
error!("Failed to broadcast agent data: {}", e);
|
||||||
|
} else {
|
||||||
|
debug!("Successfully broadcast structured agent data");
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle systemd service control commands
|
/// Check for status changes and send notifications
|
||||||
async fn handle_service_control(&self, service_name: &str, action: &ServiceAction) -> Result<()> {
|
async fn check_status_changes_and_notify(&mut self, agent_data: &AgentData) -> Result<()> {
|
||||||
let action_str = match action {
|
// Extract current status
|
||||||
ServiceAction::Start => "start",
|
let current_status = SystemStatus {
|
||||||
ServiceAction::Stop => "stop",
|
cpu_load_status: agent_data.system.cpu.load_status.clone(),
|
||||||
ServiceAction::Restart => "restart",
|
cpu_temperature_status: agent_data.system.cpu.temperature_status.clone(),
|
||||||
ServiceAction::Status => "status",
|
memory_usage_status: agent_data.system.memory.usage_status.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
info!("Executing systemctl {} {}", action_str, service_name);
|
// Check for status changes
|
||||||
|
if let Some(previous) = self.previous_status.clone() {
|
||||||
|
self.check_and_notify_status_change(
|
||||||
|
"CPU Load",
|
||||||
|
&previous.cpu_load_status,
|
||||||
|
¤t_status.cpu_load_status,
|
||||||
|
format!("CPU load: {:.1}", agent_data.system.cpu.load_1min)
|
||||||
|
).await?;
|
||||||
|
|
||||||
let output = tokio::process::Command::new("sudo")
|
self.check_and_notify_status_change(
|
||||||
.arg("systemctl")
|
"CPU Temperature",
|
||||||
.arg(action_str)
|
&previous.cpu_temperature_status,
|
||||||
.arg(service_name)
|
¤t_status.cpu_temperature_status,
|
||||||
.output()
|
format!("CPU temperature: {}°C",
|
||||||
.await?;
|
agent_data.system.cpu.temperature_celsius.unwrap_or(0.0) as i32)
|
||||||
|
).await?;
|
||||||
|
|
||||||
if output.status.success() {
|
self.check_and_notify_status_change(
|
||||||
info!("Service {} {} completed successfully", service_name, action_str);
|
"Memory Usage",
|
||||||
if !output.stdout.is_empty() {
|
&previous.memory_usage_status,
|
||||||
debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
|
¤t_status.memory_usage_status,
|
||||||
}
|
format!("Memory usage: {:.1}%", agent_data.system.memory.usage_percent)
|
||||||
} else {
|
).await?;
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
||||||
error!("Service {} {} failed: {}", service_name, action_str, stderr);
|
|
||||||
return Err(anyhow::anyhow!("systemctl {} {} failed: {}", action_str, service_name, stderr));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Force refresh metrics after service control to update service status
|
|
||||||
if matches!(action, ServiceAction::Start | ServiceAction::Stop | ServiceAction::Restart) {
|
|
||||||
info!("Triggering metric refresh after service control");
|
|
||||||
// Note: We can't call self.collect_metrics_only() here due to borrowing issues
|
|
||||||
// The next metric collection cycle will pick up the changes
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Store current status for next comparison
|
||||||
|
self.previous_status = Some(current_status);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle NixOS system rebuild commands with git clone approach
|
/// Check individual status change and send notification if degraded
|
||||||
async fn handle_system_rebuild(&self, git_url: &str, git_branch: &str, working_dir: &str, api_key_file: Option<&str>) -> Result<()> {
|
async fn check_and_notify_status_change(
|
||||||
info!("Starting NixOS system rebuild: {} @ {} -> {}", git_url, git_branch, working_dir);
|
&mut self,
|
||||||
|
component: &str,
|
||||||
|
previous: &cm_dashboard_shared::Status,
|
||||||
|
current: &cm_dashboard_shared::Status,
|
||||||
|
details: String
|
||||||
|
) -> Result<()> {
|
||||||
|
use cm_dashboard_shared::Status;
|
||||||
|
|
||||||
// Enable maintenance mode before rebuild
|
// Only notify on status degradation (OK → Warning/Critical, Warning → Critical)
|
||||||
let maintenance_file = "/tmp/cm-maintenance";
|
let should_notify = match (previous, current) {
|
||||||
if let Err(e) = tokio::fs::File::create(maintenance_file).await {
|
(Status::Ok, Status::Warning) => true,
|
||||||
error!("Failed to create maintenance mode file: {}", e);
|
(Status::Ok, Status::Critical) => true,
|
||||||
} else {
|
(Status::Warning, Status::Critical) => true,
|
||||||
info!("Maintenance mode enabled");
|
_ => false,
|
||||||
}
|
};
|
||||||
|
|
||||||
// Clone or update repository
|
if should_notify {
|
||||||
let git_result = self.ensure_git_repository(git_url, git_branch, working_dir, api_key_file).await;
|
let subject = format!("{} {} Alert", self.hostname, component);
|
||||||
|
let body = format!(
|
||||||
// Execute nixos-rebuild if git operation succeeded - run detached but log output
|
"Alert: {} status changed from {:?} to {:?}\n\nDetails: {}\n\nTime: {}",
|
||||||
let rebuild_result = if git_result.is_ok() {
|
component,
|
||||||
info!("Git repository ready, executing nixos-rebuild in detached mode");
|
previous,
|
||||||
let log_file = std::fs::OpenOptions::new()
|
current,
|
||||||
.create(true)
|
details,
|
||||||
.append(true)
|
chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
|
||||||
.open("/var/log/cm-dashboard/nixos-rebuild.log")
|
);
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to open rebuild log: {}", e))?;
|
|
||||||
|
info!("Sending notification: {} - {:?} → {:?}", component, previous, current);
|
||||||
|
|
||||||
tokio::process::Command::new("nohup")
|
if let Err(e) = self.notification_manager.send_direct_email(&subject, &body).await {
|
||||||
.arg("sudo")
|
error!("Failed to send notification for {}: {}", component, e);
|
||||||
.arg("/run/current-system/sw/bin/nixos-rebuild")
|
|
||||||
.arg("switch")
|
|
||||||
.arg("--option")
|
|
||||||
.arg("sandbox")
|
|
||||||
.arg("false")
|
|
||||||
.arg("--flake")
|
|
||||||
.arg(".")
|
|
||||||
.current_dir(working_dir)
|
|
||||||
.stdin(std::process::Stdio::null())
|
|
||||||
.stdout(std::process::Stdio::from(log_file.try_clone().unwrap()))
|
|
||||||
.stderr(std::process::Stdio::from(log_file))
|
|
||||||
.spawn()
|
|
||||||
} else {
|
|
||||||
return git_result.and_then(|_| unreachable!());
|
|
||||||
};
|
|
||||||
|
|
||||||
// Always try to remove maintenance mode file
|
|
||||||
if let Err(e) = tokio::fs::remove_file(maintenance_file).await {
|
|
||||||
if e.kind() != std::io::ErrorKind::NotFound {
|
|
||||||
error!("Failed to remove maintenance mode file: {}", e);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
info!("Maintenance mode disabled");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check rebuild start result
|
|
||||||
match rebuild_result {
|
|
||||||
Ok(_child) => {
|
|
||||||
info!("NixOS rebuild started successfully in background");
|
|
||||||
// Don't wait for completion to avoid agent being killed during rebuild
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
error!("Failed to start nixos-rebuild: {}", e);
|
|
||||||
return Err(anyhow::anyhow!("Failed to start nixos-rebuild: {}", e));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("System rebuild completed, triggering metric refresh");
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Ensure git repository is cloned and up to date with force clone approach
|
}
|
||||||
async fn ensure_git_repository(&self, git_url: &str, git_branch: &str, working_dir: &str, api_key_file: Option<&str>) -> Result<()> {
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
// Read API key if provided
|
|
||||||
let auth_url = if let Some(key_file) = api_key_file {
|
|
||||||
match tokio::fs::read_to_string(key_file).await {
|
|
||||||
Ok(api_key) => {
|
|
||||||
let api_key = api_key.trim();
|
|
||||||
if !api_key.is_empty() {
|
|
||||||
// Convert https://gitea.cmtec.se/cm/nixosbox.git to https://token@gitea.cmtec.se/cm/nixosbox.git
|
|
||||||
if git_url.starts_with("https://") {
|
|
||||||
let url_without_protocol = &git_url[8..]; // Remove "https://"
|
|
||||||
format!("https://{}@{}", api_key, url_without_protocol)
|
|
||||||
} else {
|
|
||||||
info!("API key provided but URL is not HTTPS, using original URL");
|
|
||||||
git_url.to_string()
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
info!("API key file is empty, using original URL");
|
|
||||||
git_url.to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
info!("Could not read API key file {}: {}, using original URL", key_file, e);
|
|
||||||
git_url.to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
git_url.to_string()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Always remove existing directory and do fresh clone for consistent state
|
|
||||||
let working_path = Path::new(working_dir);
|
|
||||||
if working_path.exists() {
|
|
||||||
info!("Removing existing repository directory: {}", working_dir);
|
|
||||||
if let Err(e) = tokio::fs::remove_dir_all(working_path).await {
|
|
||||||
error!("Failed to remove existing directory: {}", e);
|
|
||||||
return Err(anyhow::anyhow!("Failed to remove existing directory: {}", e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("Force cloning git repository from {} (branch: {})", git_url, git_branch);
|
|
||||||
|
|
||||||
// Force clone with depth 1 for efficiency (no history needed for deployment)
|
|
||||||
let output = tokio::process::Command::new("git")
|
|
||||||
.arg("clone")
|
|
||||||
.arg("--depth")
|
|
||||||
.arg("1")
|
|
||||||
.arg("--branch")
|
|
||||||
.arg(git_branch)
|
|
||||||
.arg(&auth_url)
|
|
||||||
.arg(working_dir)
|
|
||||||
.output()
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
||||||
error!("Git clone failed: {}", stderr);
|
|
||||||
return Err(anyhow::anyhow!("Git clone failed: {}", stderr));
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("Git repository cloned successfully with latest state");
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,404 +1,153 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use chrono::Utc;
|
use cm_dashboard_shared::{AgentData, BackupData, BackupRepositoryData, Status};
|
||||||
use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker};
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use tokio::fs;
|
use std::fs;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
use super::{Collector, CollectorError};
|
use super::{Collector, CollectorError};
|
||||||
use tracing::error;
|
|
||||||
|
|
||||||
/// Backup collector that reads TOML status files for borgbackup metrics
|
/// Backup collector that reads backup status from TOML files with structured data output
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct BackupCollector {
|
pub struct BackupCollector {
|
||||||
pub backup_status_file: String,
|
/// Directory containing backup status files
|
||||||
pub max_age_hours: u64,
|
status_dir: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BackupCollector {
|
impl BackupCollector {
|
||||||
pub fn new(backup_status_file: Option<String>, max_age_hours: u64) -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
backup_status_file: backup_status_file
|
status_dir: "/var/lib/backup/status".to_string(),
|
||||||
.unwrap_or_else(|| "/var/lib/backup/backup-status.toml".to_string()),
|
|
||||||
max_age_hours,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn read_backup_status(&self) -> Result<Option<BackupStatusToml>, CollectorError> {
|
/// Scan directory for backup status file (nfs-backup.toml)
|
||||||
// Check if backup status file exists
|
async fn scan_status_files(&self) -> Result<Vec<PathBuf>, CollectorError> {
|
||||||
if !std::path::Path::new(&self.backup_status_file).exists() {
|
let status_path = Path::new(&self.status_dir);
|
||||||
return Ok(None); // File doesn't exist, but this is not an error
|
|
||||||
|
if !status_path.exists() {
|
||||||
|
debug!("Backup status directory not found: {}", self.status_dir);
|
||||||
|
return Ok(Vec::new());
|
||||||
}
|
}
|
||||||
|
|
||||||
let content = fs::read_to_string(&self.backup_status_file)
|
// Look for nfs-backup.toml (new NFS-based backup)
|
||||||
.await
|
let nfs_backup_file = status_path.join("nfs-backup.toml");
|
||||||
|
if nfs_backup_file.exists() {
|
||||||
|
return Ok(vec![nfs_backup_file]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No backup status file found
|
||||||
|
debug!("No nfs-backup.toml found in {}", self.status_dir);
|
||||||
|
Ok(Vec::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read a single backup status file
|
||||||
|
async fn read_status_file(&self, path: &Path) -> Result<BackupStatusToml, CollectorError> {
|
||||||
|
let content = fs::read_to_string(path)
|
||||||
.map_err(|e| CollectorError::SystemRead {
|
.map_err(|e| CollectorError::SystemRead {
|
||||||
path: self.backup_status_file.clone(),
|
path: path.to_string_lossy().to_string(),
|
||||||
error: e.to_string(),
|
error: e.to_string(),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let backup_status = toml::from_str(&content).map_err(|e| CollectorError::Parse {
|
let status: BackupStatusToml = toml::from_str(&content)
|
||||||
value: "backup status TOML".to_string(),
|
.map_err(|e| CollectorError::Parse {
|
||||||
error: e.to_string(),
|
value: content.clone(),
|
||||||
})?;
|
error: format!("Failed to parse backup status TOML: {}", e),
|
||||||
|
})?;
|
||||||
|
|
||||||
Ok(Some(backup_status))
|
Ok(status)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn calculate_backup_status(&self, backup_status: &BackupStatusToml) -> Status {
|
/// Calculate backup status from TOML status field
|
||||||
// Parse the start time to check age - handle both RFC3339 and local timestamp formats
|
fn calculate_backup_status(status_str: &str) -> Status {
|
||||||
let start_time = match chrono::DateTime::parse_from_rfc3339(&backup_status.start_time) {
|
match status_str.to_lowercase().as_str() {
|
||||||
Ok(dt) => dt.with_timezone(&Utc),
|
"success" | "completed" => Status::Ok,
|
||||||
Err(_) => {
|
"warning" => Status::Warning,
|
||||||
// Try parsing as naive datetime and assume UTC
|
"failed" | "error" => Status::Critical,
|
||||||
match chrono::NaiveDateTime::parse_from_str(
|
_ => Status::Unknown,
|
||||||
&backup_status.start_time,
|
}
|
||||||
"%Y-%m-%dT%H:%M:%S%.f",
|
}
|
||||||
) {
|
|
||||||
Ok(naive_dt) => naive_dt.and_utc(),
|
/// Convert BackupStatusToml to BackupData and populate AgentData
|
||||||
Err(_) => {
|
async fn populate_backup_data(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
error!(
|
let status_files = self.scan_status_files().await?;
|
||||||
"Failed to parse backup timestamp: {}",
|
|
||||||
backup_status.start_time
|
if status_files.is_empty() {
|
||||||
);
|
debug!("No backup status files found");
|
||||||
return Status::Unknown;
|
agent_data.backup = BackupData {
|
||||||
|
last_backup_time: None,
|
||||||
|
backup_status: Status::Unknown,
|
||||||
|
repositories: Vec::new(),
|
||||||
|
};
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Aggregate repository data across all backup status files
|
||||||
|
let mut repo_map: HashMap<String, BackupRepositoryData> = HashMap::new();
|
||||||
|
let mut worst_status = Status::Ok;
|
||||||
|
let mut latest_backup_time: Option<String> = None;
|
||||||
|
|
||||||
|
for status_file in status_files {
|
||||||
|
match self.read_status_file(&status_file).await {
|
||||||
|
Ok(backup_status) => {
|
||||||
|
// Calculate backup status
|
||||||
|
let backup_status_enum = Self::calculate_backup_status(&backup_status.status);
|
||||||
|
worst_status = worst_status.max(backup_status_enum);
|
||||||
|
|
||||||
|
// Track latest backup time
|
||||||
|
if latest_backup_time.is_none() || Some(&backup_status.start_time) > latest_backup_time.as_ref() {
|
||||||
|
latest_backup_time = Some(backup_status.start_time.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process each service in this backup
|
||||||
|
for (service_name, service_status) in backup_status.services {
|
||||||
|
// Convert bytes to GB
|
||||||
|
let repo_size_gb = service_status.repo_size_bytes as f32 / 1_073_741_824.0;
|
||||||
|
|
||||||
|
// Calculate service status
|
||||||
|
let service_status_enum = Self::calculate_backup_status(&service_status.status);
|
||||||
|
worst_status = worst_status.max(service_status_enum);
|
||||||
|
|
||||||
|
// Update or insert repository data
|
||||||
|
repo_map.insert(service_name.clone(), BackupRepositoryData {
|
||||||
|
name: service_name,
|
||||||
|
archive_count: service_status.archive_count,
|
||||||
|
repo_size_gb,
|
||||||
|
status: service_status_enum,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Failed to read backup status file {:?}: {}", status_file, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert HashMap to sorted Vec
|
||||||
|
let mut repositories: Vec<BackupRepositoryData> = repo_map.into_values().collect();
|
||||||
|
repositories.sort_by(|a, b| a.name.cmp(&b.name));
|
||||||
|
|
||||||
|
agent_data.backup = BackupData {
|
||||||
|
last_backup_time: latest_backup_time,
|
||||||
|
backup_status: worst_status,
|
||||||
|
repositories,
|
||||||
};
|
};
|
||||||
|
|
||||||
let hours_since_backup = Utc::now().signed_duration_since(start_time).num_hours();
|
Ok(())
|
||||||
|
|
||||||
// Check overall backup status
|
|
||||||
match backup_status.status.as_str() {
|
|
||||||
"success" => {
|
|
||||||
if hours_since_backup > self.max_age_hours as i64 {
|
|
||||||
Status::Warning // Backup too old
|
|
||||||
} else {
|
|
||||||
Status::Ok
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"failed" => Status::Critical,
|
|
||||||
"running" => Status::Ok, // Currently running is OK
|
|
||||||
_ => Status::Unknown,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn calculate_service_status(&self, service: &ServiceStatus) -> Status {
|
|
||||||
match service.status.as_str() {
|
|
||||||
"completed" => {
|
|
||||||
if service.exit_code == 0 {
|
|
||||||
Status::Ok
|
|
||||||
} else {
|
|
||||||
Status::Critical
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"failed" => Status::Critical,
|
|
||||||
"disabled" => Status::Warning, // Service intentionally disabled
|
|
||||||
"running" => Status::Ok,
|
|
||||||
_ => Status::Unknown,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn bytes_to_gb(bytes: u64) -> f32 {
|
|
||||||
bytes as f32 / (1024.0 * 1024.0 * 1024.0)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl Collector for BackupCollector {
|
impl Collector for BackupCollector {
|
||||||
|
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
async fn collect(&self, _status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
debug!("Collecting backup status");
|
||||||
let backup_status_option = self.read_backup_status().await?;
|
self.populate_backup_data(agent_data).await
|
||||||
let mut metrics = Vec::new();
|
|
||||||
let timestamp = chrono::Utc::now().timestamp() as u64;
|
|
||||||
|
|
||||||
// If no backup status file exists, return minimal metrics indicating no backup system
|
|
||||||
let backup_status = match backup_status_option {
|
|
||||||
Some(status) => status,
|
|
||||||
None => {
|
|
||||||
// No backup system configured - return minimal "unknown" metrics
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_overall_status".to_string(),
|
|
||||||
value: MetricValue::String("no_backup_system".to_string()),
|
|
||||||
status: Status::Unknown,
|
|
||||||
timestamp,
|
|
||||||
description: Some("No backup system configured (no status file found)".to_string()),
|
|
||||||
unit: None,
|
|
||||||
});
|
|
||||||
return Ok(metrics);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Overall backup status
|
|
||||||
let overall_status = self.calculate_backup_status(&backup_status);
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_overall_status".to_string(),
|
|
||||||
value: MetricValue::String(match overall_status {
|
|
||||||
Status::Ok => "ok".to_string(),
|
|
||||||
Status::Pending => "pending".to_string(),
|
|
||||||
Status::Warning => "warning".to_string(),
|
|
||||||
Status::Critical => "critical".to_string(),
|
|
||||||
Status::Unknown => "unknown".to_string(),
|
|
||||||
}),
|
|
||||||
status: overall_status,
|
|
||||||
timestamp,
|
|
||||||
description: Some(format!(
|
|
||||||
"Backup: {} at {}",
|
|
||||||
backup_status.status, backup_status.start_time
|
|
||||||
)),
|
|
||||||
unit: None,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Backup duration
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_duration_seconds".to_string(),
|
|
||||||
value: MetricValue::Integer(backup_status.duration_seconds),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Duration of last backup run".to_string()),
|
|
||||||
unit: Some("seconds".to_string()),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Last backup timestamp - use last_updated (when backup finished) instead of start_time
|
|
||||||
let last_updated_dt_result =
|
|
||||||
chrono::DateTime::parse_from_rfc3339(&backup_status.last_updated)
|
|
||||||
.map(|dt| dt.with_timezone(&Utc))
|
|
||||||
.or_else(|_| {
|
|
||||||
// Try parsing as naive datetime and assume UTC
|
|
||||||
chrono::NaiveDateTime::parse_from_str(
|
|
||||||
&backup_status.last_updated,
|
|
||||||
"%Y-%m-%dT%H:%M:%S%.f",
|
|
||||||
)
|
|
||||||
.map(|naive_dt| naive_dt.and_utc())
|
|
||||||
});
|
|
||||||
|
|
||||||
if let Ok(last_updated_dt) = last_updated_dt_result {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_last_run_timestamp".to_string(),
|
|
||||||
value: MetricValue::Integer(last_updated_dt.timestamp()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Timestamp of last backup completion".to_string()),
|
|
||||||
unit: Some("unix_timestamp".to_string()),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
error!(
|
|
||||||
"Failed to parse backup timestamp for last_run_timestamp: {}",
|
|
||||||
backup_status.last_updated
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Individual service metrics
|
|
||||||
for (service_name, service) in &backup_status.services {
|
|
||||||
let service_status = self.calculate_service_status(service);
|
|
||||||
|
|
||||||
// Service status
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("backup_service_{}_status", service_name),
|
|
||||||
value: MetricValue::String(match service_status {
|
|
||||||
Status::Ok => "ok".to_string(),
|
|
||||||
Status::Pending => "pending".to_string(),
|
|
||||||
Status::Warning => "warning".to_string(),
|
|
||||||
Status::Critical => "critical".to_string(),
|
|
||||||
Status::Unknown => "unknown".to_string(),
|
|
||||||
}),
|
|
||||||
status: service_status,
|
|
||||||
timestamp,
|
|
||||||
description: Some(format!(
|
|
||||||
"Backup service {} status: {}",
|
|
||||||
service_name, service.status
|
|
||||||
)),
|
|
||||||
unit: None,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Service exit code
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("backup_service_{}_exit_code", service_name),
|
|
||||||
value: MetricValue::Integer(service.exit_code),
|
|
||||||
status: if service.exit_code == 0 {
|
|
||||||
Status::Ok
|
|
||||||
} else {
|
|
||||||
Status::Critical
|
|
||||||
},
|
|
||||||
timestamp,
|
|
||||||
description: Some(format!("Exit code for backup service {}", service_name)),
|
|
||||||
unit: None,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Repository archive count
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("backup_service_{}_archive_count", service_name),
|
|
||||||
value: MetricValue::Integer(service.archive_count),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some(format!("Number of archives in {} repository", service_name)),
|
|
||||||
unit: Some("archives".to_string()),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Repository size in GB
|
|
||||||
let repo_size_gb = Self::bytes_to_gb(service.repo_size_bytes);
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("backup_service_{}_repo_size_gb", service_name),
|
|
||||||
value: MetricValue::Float(repo_size_gb),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some(format!("Repository size for {} in GB", service_name)),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Repository path for reference
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("backup_service_{}_repo_path", service_name),
|
|
||||||
value: MetricValue::String(service.repo_path.clone()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some(format!("Repository path for {}", service_name)),
|
|
||||||
unit: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Total number of services
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_total_services".to_string(),
|
|
||||||
value: MetricValue::Integer(backup_status.services.len() as i64),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Total number of backup services".to_string()),
|
|
||||||
unit: Some("services".to_string()),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Calculate total repository size
|
|
||||||
let total_size_bytes: u64 = backup_status
|
|
||||||
.services
|
|
||||||
.values()
|
|
||||||
.map(|s| s.repo_size_bytes)
|
|
||||||
.sum();
|
|
||||||
let total_size_gb = Self::bytes_to_gb(total_size_bytes);
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_total_repo_size_gb".to_string(),
|
|
||||||
value: MetricValue::Float(total_size_gb),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Total size of all backup repositories".to_string()),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Disk space metrics for backup directory
|
|
||||||
if let Some(ref disk_space) = backup_status.disk_space {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_disk_total_gb".to_string(),
|
|
||||||
value: MetricValue::Float(disk_space.total_gb as f32),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Total disk space available for backups".to_string()),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
});
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_disk_used_gb".to_string(),
|
|
||||||
value: MetricValue::Float(disk_space.used_gb as f32),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Used disk space on backup drive".to_string()),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
});
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_disk_available_gb".to_string(),
|
|
||||||
value: MetricValue::Float(disk_space.available_gb as f32),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Available disk space on backup drive".to_string()),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
});
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_disk_usage_percent".to_string(),
|
|
||||||
value: MetricValue::Float(disk_space.usage_percent as f32),
|
|
||||||
status: if disk_space.usage_percent >= 95.0 {
|
|
||||||
Status::Critical
|
|
||||||
} else if disk_space.usage_percent >= 85.0 {
|
|
||||||
Status::Warning
|
|
||||||
} else {
|
|
||||||
Status::Ok
|
|
||||||
},
|
|
||||||
timestamp,
|
|
||||||
description: Some("Backup disk usage percentage".to_string()),
|
|
||||||
unit: Some("percent".to_string()),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add disk identification metrics if available from disk_space
|
|
||||||
if let Some(ref product_name) = disk_space.product_name {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_disk_product_name".to_string(),
|
|
||||||
value: MetricValue::String(product_name.clone()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Backup disk product name from SMART data".to_string()),
|
|
||||||
unit: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref serial_number) = disk_space.serial_number {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_disk_serial_number".to_string(),
|
|
||||||
value: MetricValue::String(serial_number.clone()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Backup disk serial number from SMART data".to_string()),
|
|
||||||
unit: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add standalone disk identification metrics from TOML fields
|
|
||||||
if let Some(ref product_name) = backup_status.disk_product_name {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_disk_product_name".to_string(),
|
|
||||||
value: MetricValue::String(product_name.clone()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Backup disk product name from SMART data".to_string()),
|
|
||||||
unit: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref serial_number) = backup_status.disk_serial_number {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "backup_disk_serial_number".to_string(),
|
|
||||||
value: MetricValue::String(serial_number.clone()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some("Backup disk serial number from SMART data".to_string()),
|
|
||||||
unit: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Count services by status
|
|
||||||
let mut status_counts = HashMap::new();
|
|
||||||
for service in backup_status.services.values() {
|
|
||||||
*status_counts.entry(service.status.clone()).or_insert(0) += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (status_name, count) in status_counts {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("backup_services_{}_count", status_name),
|
|
||||||
value: MetricValue::Integer(count),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
description: Some(format!("Number of services with status: {}", status_name)),
|
|
||||||
unit: Some("services".to_string()),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(metrics)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TOML structure for backup status file
|
/// TOML structure for backup status file
|
||||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct BackupStatusToml {
|
struct BackupStatusToml {
|
||||||
pub backup_name: String,
|
pub backup_name: String,
|
||||||
pub start_time: String,
|
pub start_time: String,
|
||||||
pub current_time: String,
|
pub current_time: String,
|
||||||
@ -408,11 +157,12 @@ pub struct BackupStatusToml {
|
|||||||
pub disk_space: Option<DiskSpace>,
|
pub disk_space: Option<DiskSpace>,
|
||||||
pub disk_product_name: Option<String>,
|
pub disk_product_name: Option<String>,
|
||||||
pub disk_serial_number: Option<String>,
|
pub disk_serial_number: Option<String>,
|
||||||
|
pub disk_wear_percent: Option<f32>,
|
||||||
pub services: HashMap<String, ServiceStatus>,
|
pub services: HashMap<String, ServiceStatus>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct DiskSpace {
|
struct DiskSpace {
|
||||||
pub total_bytes: u64,
|
pub total_bytes: u64,
|
||||||
pub used_bytes: u64,
|
pub used_bytes: u64,
|
||||||
pub available_bytes: u64,
|
pub available_bytes: u64,
|
||||||
@ -420,16 +170,13 @@ pub struct DiskSpace {
|
|||||||
pub used_gb: f64,
|
pub used_gb: f64,
|
||||||
pub available_gb: f64,
|
pub available_gb: f64,
|
||||||
pub usage_percent: f64,
|
pub usage_percent: f64,
|
||||||
// Optional disk identification fields
|
|
||||||
pub product_name: Option<String>,
|
|
||||||
pub serial_number: Option<String>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct ServiceStatus {
|
struct ServiceStatus {
|
||||||
pub status: String,
|
pub status: String,
|
||||||
pub exit_code: i64,
|
pub exit_code: i64,
|
||||||
pub repo_path: String,
|
pub repo_path: String,
|
||||||
pub archive_count: i64,
|
pub archive_count: i64,
|
||||||
pub repo_size_bytes: u64,
|
pub repo_size_bytes: u64,
|
||||||
}
|
}
|
||||||
@ -1,5 +1,5 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use cm_dashboard_shared::{registry, Metric, MetricValue, Status, StatusTracker, HysteresisThresholds};
|
use cm_dashboard_shared::{AgentData, Status, HysteresisThresholds};
|
||||||
|
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
@ -38,19 +38,31 @@ impl CpuCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate CPU load status using hysteresis thresholds
|
/// Calculate CPU load status using thresholds
|
||||||
fn calculate_load_status(&self, metric_name: &str, load: f32, status_tracker: &mut StatusTracker) -> Status {
|
fn calculate_load_status(&self, load: f32) -> Status {
|
||||||
status_tracker.calculate_with_hysteresis(metric_name, load, &self.load_thresholds)
|
if load >= self.load_thresholds.critical_high {
|
||||||
|
Status::Critical
|
||||||
|
} else if load >= self.load_thresholds.warning_high {
|
||||||
|
Status::Warning
|
||||||
|
} else {
|
||||||
|
Status::Ok
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate CPU temperature status using hysteresis thresholds
|
/// Calculate CPU temperature status using thresholds
|
||||||
fn calculate_temperature_status(&self, metric_name: &str, temp: f32, status_tracker: &mut StatusTracker) -> Status {
|
fn calculate_temperature_status(&self, temp: f32) -> Status {
|
||||||
status_tracker.calculate_with_hysteresis(metric_name, temp, &self.temperature_thresholds)
|
if temp >= self.temperature_thresholds.critical_high {
|
||||||
|
Status::Critical
|
||||||
|
} else if temp >= self.temperature_thresholds.warning_high {
|
||||||
|
Status::Warning
|
||||||
|
} else {
|
||||||
|
Status::Ok
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Collect CPU load averages from /proc/loadavg
|
/// Collect CPU load averages and populate AgentData
|
||||||
/// Format: "0.52 0.58 0.59 1/257 12345"
|
/// Format: "0.52 0.58 0.59 1/257 12345"
|
||||||
async fn collect_load_averages(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
async fn collect_load_averages(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
let content = utils::read_proc_file("/proc/loadavg")?;
|
let content = utils::read_proc_file("/proc/loadavg")?;
|
||||||
let parts: Vec<&str> = content.trim().split_whitespace().collect();
|
let parts: Vec<&str> = content.trim().split_whitespace().collect();
|
||||||
|
|
||||||
@ -65,53 +77,25 @@ impl CpuCollector {
|
|||||||
let load_5min = utils::parse_f32(parts[1])?;
|
let load_5min = utils::parse_f32(parts[1])?;
|
||||||
let load_15min = utils::parse_f32(parts[2])?;
|
let load_15min = utils::parse_f32(parts[2])?;
|
||||||
|
|
||||||
// Only apply thresholds to 5-minute load average
|
// Populate CPU data directly
|
||||||
let load_1min_status = Status::Ok; // No alerting on 1min
|
agent_data.system.cpu.load_1min = load_1min;
|
||||||
let load_5min_status = self.calculate_load_status(registry::CPU_LOAD_5MIN, load_5min, status_tracker); // Only 5min triggers alerts
|
agent_data.system.cpu.load_5min = load_5min;
|
||||||
let load_15min_status = Status::Ok; // No alerting on 15min
|
agent_data.system.cpu.load_15min = load_15min;
|
||||||
|
|
||||||
Ok(vec![
|
Ok(())
|
||||||
Metric::new(
|
|
||||||
registry::CPU_LOAD_1MIN.to_string(),
|
|
||||||
MetricValue::Float(load_1min),
|
|
||||||
load_1min_status,
|
|
||||||
)
|
|
||||||
.with_description("CPU load average over 1 minute".to_string()),
|
|
||||||
Metric::new(
|
|
||||||
registry::CPU_LOAD_5MIN.to_string(),
|
|
||||||
MetricValue::Float(load_5min),
|
|
||||||
load_5min_status,
|
|
||||||
)
|
|
||||||
.with_description("CPU load average over 5 minutes".to_string()),
|
|
||||||
Metric::new(
|
|
||||||
registry::CPU_LOAD_15MIN.to_string(),
|
|
||||||
MetricValue::Float(load_15min),
|
|
||||||
load_15min_status,
|
|
||||||
)
|
|
||||||
.with_description("CPU load average over 15 minutes".to_string()),
|
|
||||||
])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Collect CPU temperature from thermal zones
|
/// Collect CPU temperature and populate AgentData
|
||||||
/// Prioritizes x86_pkg_temp over generic thermal zones (legacy behavior)
|
/// Prioritizes x86_pkg_temp over generic thermal zones
|
||||||
async fn collect_temperature(&self, status_tracker: &mut StatusTracker) -> Result<Option<Metric>, CollectorError> {
|
async fn collect_temperature(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
// Try x86_pkg_temp first (Intel CPU package temperature)
|
// Try x86_pkg_temp first (Intel CPU package temperature)
|
||||||
if let Ok(temp) = self
|
if let Ok(temp) = self
|
||||||
.read_thermal_zone("/sys/class/thermal/thermal_zone0/temp")
|
.read_thermal_zone("/sys/class/thermal/thermal_zone0/temp")
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
let temp_celsius = temp as f32 / 1000.0;
|
let temp_celsius = temp as f32 / 1000.0;
|
||||||
let status = self.calculate_temperature_status(registry::CPU_TEMPERATURE_CELSIUS, temp_celsius, status_tracker);
|
agent_data.system.cpu.temperature_celsius = Some(temp_celsius);
|
||||||
|
return Ok(());
|
||||||
return Ok(Some(
|
|
||||||
Metric::new(
|
|
||||||
registry::CPU_TEMPERATURE_CELSIUS.to_string(),
|
|
||||||
MetricValue::Float(temp_celsius),
|
|
||||||
status,
|
|
||||||
)
|
|
||||||
.with_description("CPU package temperature".to_string())
|
|
||||||
.with_unit("°C".to_string()),
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback: try other thermal zones
|
// Fallback: try other thermal zones
|
||||||
@ -119,22 +103,14 @@ impl CpuCollector {
|
|||||||
let path = format!("/sys/class/thermal/thermal_zone{}/temp", zone_id);
|
let path = format!("/sys/class/thermal/thermal_zone{}/temp", zone_id);
|
||||||
if let Ok(temp) = self.read_thermal_zone(&path).await {
|
if let Ok(temp) = self.read_thermal_zone(&path).await {
|
||||||
let temp_celsius = temp as f32 / 1000.0;
|
let temp_celsius = temp as f32 / 1000.0;
|
||||||
let status = self.calculate_temperature_status(registry::CPU_TEMPERATURE_CELSIUS, temp_celsius, status_tracker);
|
agent_data.system.cpu.temperature_celsius = Some(temp_celsius);
|
||||||
|
return Ok(());
|
||||||
return Ok(Some(
|
|
||||||
Metric::new(
|
|
||||||
registry::CPU_TEMPERATURE_CELSIUS.to_string(),
|
|
||||||
MetricValue::Float(temp_celsius),
|
|
||||||
status,
|
|
||||||
)
|
|
||||||
.with_description(format!("CPU temperature from thermal_zone{}", zone_id))
|
|
||||||
.with_unit("°C".to_string()),
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("No CPU temperature sensors found");
|
debug!("No CPU temperature sensors found");
|
||||||
Ok(None)
|
// Leave temperature as None if not available
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read temperature from thermal zone efficiently
|
/// Read temperature from thermal zone efficiently
|
||||||
@ -143,84 +119,160 @@ impl CpuCollector {
|
|||||||
utils::parse_u64(content.trim())
|
utils::parse_u64(content.trim())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Collect CPU frequency from /proc/cpuinfo or scaling governor
|
/// Collect static CPU information from /proc/cpuinfo (only once at startup)
|
||||||
async fn collect_frequency(&self) -> Result<Option<Metric>, CollectorError> {
|
async fn collect_cpu_info(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
// Try scaling frequency first (more accurate for current frequency)
|
let content = utils::read_proc_file("/proc/cpuinfo")?;
|
||||||
if let Ok(freq) =
|
|
||||||
utils::read_proc_file("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq")
|
|
||||||
{
|
|
||||||
if let Ok(freq_khz) = utils::parse_u64(freq.trim()) {
|
|
||||||
let freq_mhz = freq_khz as f32 / 1000.0;
|
|
||||||
|
|
||||||
return Ok(Some(
|
let mut model_name: Option<String> = None;
|
||||||
Metric::new(
|
let mut core_count: u32 = 0;
|
||||||
registry::CPU_FREQUENCY_MHZ.to_string(),
|
|
||||||
MetricValue::Float(freq_mhz),
|
for line in content.lines() {
|
||||||
Status::Ok, // Frequency doesn't have status thresholds
|
if line.starts_with("model name") {
|
||||||
)
|
if let Some(colon_pos) = line.find(':') {
|
||||||
.with_description("Current CPU frequency".to_string())
|
let full_name = line[colon_pos + 1..].trim();
|
||||||
.with_unit("MHz".to_string()),
|
// Extract just the model number (e.g., "i7-9700" from "Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz")
|
||||||
));
|
let model = Self::extract_cpu_model(full_name);
|
||||||
|
if model_name.is_none() {
|
||||||
|
model_name = Some(model);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if line.starts_with("processor") {
|
||||||
|
core_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback: parse /proc/cpuinfo for base frequency
|
agent_data.system.cpu.model_name = model_name;
|
||||||
if let Ok(content) = utils::read_proc_file("/proc/cpuinfo") {
|
if core_count > 0 {
|
||||||
for line in content.lines() {
|
agent_data.system.cpu.core_count = Some(core_count);
|
||||||
if line.starts_with("cpu MHz") {
|
}
|
||||||
if let Some(freq_str) = line.split(':').nth(1) {
|
|
||||||
if let Ok(freq_mhz) = utils::parse_f32(freq_str) {
|
Ok(())
|
||||||
return Ok(Some(
|
}
|
||||||
Metric::new(
|
|
||||||
registry::CPU_FREQUENCY_MHZ.to_string(),
|
/// Extract CPU model number from full model name
|
||||||
MetricValue::Float(freq_mhz),
|
/// Examples:
|
||||||
Status::Ok,
|
/// - "Intel(R) Core(TM) i7-9700 CPU @ 3.00GHz" -> "i7-9700"
|
||||||
)
|
/// - "12th Gen Intel(R) Core(TM) i7-12700K" -> "i7-12700K"
|
||||||
.with_description(
|
/// - "AMD Ryzen 9 5950X 16-Core Processor" -> "Ryzen 9 5950X"
|
||||||
"CPU base frequency from /proc/cpuinfo".to_string(),
|
fn extract_cpu_model(full_name: &str) -> String {
|
||||||
)
|
// Look for Intel Core patterns (both old and new gen): i3, i5, i7, i9
|
||||||
.with_unit("MHz".to_string()),
|
// Match pattern like "i7-12700K" or "i7-9700"
|
||||||
));
|
for prefix in &["i3-", "i5-", "i7-", "i9-"] {
|
||||||
|
if let Some(pos) = full_name.find(prefix) {
|
||||||
|
// Find end of model number (until space or end of string)
|
||||||
|
let after_prefix = &full_name[pos..];
|
||||||
|
let end = after_prefix.find(' ').unwrap_or(after_prefix.len());
|
||||||
|
return after_prefix[..end].to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for AMD Ryzen pattern
|
||||||
|
if let Some(pos) = full_name.find("Ryzen") {
|
||||||
|
// Extract "Ryzen X XXXX" pattern
|
||||||
|
let after_ryzen = &full_name[pos..];
|
||||||
|
let parts: Vec<&str> = after_ryzen.split_whitespace().collect();
|
||||||
|
if parts.len() >= 3 {
|
||||||
|
return format!("{} {} {}", parts[0], parts[1], parts[2]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: return first 15 characters or full name if shorter
|
||||||
|
if full_name.len() > 15 {
|
||||||
|
full_name[..15].to_string()
|
||||||
|
} else {
|
||||||
|
full_name.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect CPU C-state (idle depth) and populate AgentData with top 3 C-states by usage
|
||||||
|
async fn collect_cstate(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
|
// Read C-state usage from first CPU (representative of overall system)
|
||||||
|
// C-states indicate CPU idle depth: C1=light sleep, C6=deep sleep, C10=deepest
|
||||||
|
|
||||||
|
let mut cstate_times: Vec<(String, u64)> = Vec::new();
|
||||||
|
let mut total_time: u64 = 0;
|
||||||
|
|
||||||
|
// Collect all C-state times from CPU0
|
||||||
|
for state_num in 0..=10 {
|
||||||
|
let time_path = format!("/sys/devices/system/cpu/cpu0/cpuidle/state{}/time", state_num);
|
||||||
|
let name_path = format!("/sys/devices/system/cpu/cpu0/cpuidle/state{}/name", state_num);
|
||||||
|
|
||||||
|
if let Ok(time_str) = utils::read_proc_file(&time_path) {
|
||||||
|
if let Ok(time) = utils::parse_u64(time_str.trim()) {
|
||||||
|
if let Ok(name) = utils::read_proc_file(&name_path) {
|
||||||
|
let state_name = name.trim();
|
||||||
|
// Skip POLL state (not real idle)
|
||||||
|
if state_name != "POLL" && time > 0 {
|
||||||
|
// Extract "C" + digits pattern (C3, C10, etc.) to reduce JSON size
|
||||||
|
// Handles formats like "C3_ACPI", "C10_MWAIT", etc.
|
||||||
|
let clean_name = if let Some(c_pos) = state_name.find('C') {
|
||||||
|
let rest = &state_name[c_pos + 1..];
|
||||||
|
let digit_count = rest.chars().take_while(|c| c.is_ascii_digit()).count();
|
||||||
|
if digit_count > 0 {
|
||||||
|
state_name[c_pos..c_pos + 1 + digit_count].to_string()
|
||||||
|
} else {
|
||||||
|
state_name.to_string()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
state_name.to_string()
|
||||||
|
};
|
||||||
|
cstate_times.push((clean_name, time));
|
||||||
|
total_time += time;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break; // Only need first CPU entry
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// No more states available
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("CPU frequency not available");
|
// Sort by time descending to get top 3
|
||||||
Ok(None)
|
cstate_times.sort_by(|a, b| b.1.cmp(&a.1));
|
||||||
|
|
||||||
|
// Calculate percentages for top 3 and populate AgentData
|
||||||
|
agent_data.system.cpu.cstates = cstate_times
|
||||||
|
.iter()
|
||||||
|
.take(3)
|
||||||
|
.map(|(name, time)| {
|
||||||
|
let percent = if total_time > 0 {
|
||||||
|
(*time as f32 / total_time as f32) * 100.0
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
cm_dashboard_shared::CStateInfo {
|
||||||
|
name: name.clone(),
|
||||||
|
percent,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl Collector for CpuCollector {
|
impl Collector for CpuCollector {
|
||||||
|
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
|
||||||
debug!("Collecting CPU metrics");
|
debug!("Collecting CPU metrics");
|
||||||
let start = std::time::Instant::now();
|
let start = std::time::Instant::now();
|
||||||
|
|
||||||
let mut metrics = Vec::with_capacity(5); // Pre-allocate for efficiency
|
// Collect static CPU info (only once at startup)
|
||||||
|
if agent_data.system.cpu.model_name.is_none() || agent_data.system.cpu.core_count.is_none() {
|
||||||
|
self.collect_cpu_info(agent_data).await?;
|
||||||
|
}
|
||||||
|
|
||||||
// Collect load averages (always available)
|
// Collect load averages (always available)
|
||||||
metrics.extend(self.collect_load_averages(status_tracker).await?);
|
self.collect_load_averages(agent_data).await?;
|
||||||
|
|
||||||
// Collect temperature (optional)
|
// Collect temperature (optional)
|
||||||
if let Some(temp_metric) = self.collect_temperature(status_tracker).await? {
|
self.collect_temperature(agent_data).await?;
|
||||||
metrics.push(temp_metric);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Collect frequency (optional)
|
// Collect C-state (CPU idle depth)
|
||||||
if let Some(freq_metric) = self.collect_frequency().await? {
|
self.collect_cstate(agent_data).await?;
|
||||||
metrics.push(freq_metric);
|
|
||||||
}
|
|
||||||
|
|
||||||
let duration = start.elapsed();
|
let duration = start.elapsed();
|
||||||
debug!(
|
debug!("CPU collection completed in {:?}", duration);
|
||||||
"CPU collection completed in {:?} with {} metrics",
|
|
||||||
duration,
|
|
||||||
metrics.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
// Efficiency check: warn if collection takes too long
|
// Efficiency check: warn if collection takes too long
|
||||||
if duration.as_millis() > 1 {
|
if duration.as_millis() > 1 {
|
||||||
@ -230,10 +282,14 @@ impl Collector for CpuCollector {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store performance metrics
|
// Calculate status using thresholds (use 5-minute average for stability)
|
||||||
// Performance tracking handled by cache system
|
agent_data.system.cpu.load_status = self.calculate_load_status(agent_data.system.cpu.load_5min);
|
||||||
|
agent_data.system.cpu.temperature_status = if let Some(temp) = agent_data.system.cpu.temperature_celsius {
|
||||||
|
self.calculate_temperature_status(temp)
|
||||||
|
} else {
|
||||||
|
Status::Unknown
|
||||||
|
};
|
||||||
|
|
||||||
Ok(metrics)
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,596 +1,853 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker, HysteresisThresholds};
|
use cm_dashboard_shared::{AgentData, DriveData, FilesystemData, PoolData, HysteresisThresholds, Status};
|
||||||
|
|
||||||
use crate::config::DiskConfig;
|
use crate::config::DiskConfig;
|
||||||
use std::process::Command;
|
use tokio::process::Command as TokioCommand;
|
||||||
use std::time::Instant;
|
use std::process::Command as StdCommand;
|
||||||
use tracing::debug;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use super::{Collector, CollectorError};
|
use super::{Collector, CollectorError};
|
||||||
|
|
||||||
/// Information about a storage pool (mount point with underlying drives)
|
/// Storage collector with clean architecture and structured data output
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
struct StoragePool {
|
|
||||||
name: String, // e.g., "steampool", "root"
|
|
||||||
mount_point: String, // e.g., "/mnt/steampool", "/"
|
|
||||||
filesystem: String, // e.g., "mergerfs", "ext4", "zfs", "btrfs"
|
|
||||||
storage_type: String, // e.g., "mergerfs", "single", "raid", "zfs"
|
|
||||||
size: String, // e.g., "2.5TB"
|
|
||||||
used: String, // e.g., "2.1TB"
|
|
||||||
available: String, // e.g., "400GB"
|
|
||||||
usage_percent: f32, // e.g., 85.0
|
|
||||||
underlying_drives: Vec<DriveInfo>, // Individual physical drives
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Information about an individual physical drive
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
struct DriveInfo {
|
|
||||||
device: String, // e.g., "sda", "nvme0n1"
|
|
||||||
health_status: String, // e.g., "PASSED", "FAILED"
|
|
||||||
temperature: Option<f32>, // e.g., 45.0°C
|
|
||||||
wear_level: Option<f32>, // e.g., 12.0% (for SSDs)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Disk usage collector for monitoring filesystem sizes
|
|
||||||
pub struct DiskCollector {
|
pub struct DiskCollector {
|
||||||
config: DiskConfig,
|
config: DiskConfig,
|
||||||
temperature_thresholds: HysteresisThresholds,
|
temperature_thresholds: HysteresisThresholds,
|
||||||
detected_devices: std::collections::HashMap<String, Vec<String>>, // mount_point -> devices
|
}
|
||||||
|
|
||||||
|
/// A physical drive with its filesystems
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct PhysicalDrive {
|
||||||
|
name: String, // e.g., "nvme0n1", "sda"
|
||||||
|
health: String, // SMART health status
|
||||||
|
filesystems: Vec<Filesystem>, // mounted filesystems on this drive
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A filesystem mounted on a drive
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct Filesystem {
|
||||||
|
mount_point: String, // e.g., "/", "/boot"
|
||||||
|
usage_percent: f32, // Usage percentage
|
||||||
|
used_bytes: u64, // Used bytes
|
||||||
|
total_bytes: u64, // Total bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
/// MergerFS pool
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct MergerfsPool {
|
||||||
|
name: String, // e.g., "srv_media"
|
||||||
|
mount_point: String, // e.g., "/srv/media"
|
||||||
|
total_bytes: u64, // Pool total bytes
|
||||||
|
used_bytes: u64, // Pool used bytes
|
||||||
|
data_drives: Vec<PoolDrive>, // Data drives in pool
|
||||||
|
parity_drives: Vec<PoolDrive>, // Parity drives in pool
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Drive in a storage pool
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct PoolDrive {
|
||||||
|
name: String, // Drive name
|
||||||
|
mount_point: String, // e.g., "/mnt/disk1"
|
||||||
|
temperature_celsius: Option<f32>, // Drive temperature
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DiskCollector {
|
impl DiskCollector {
|
||||||
pub fn new(config: DiskConfig) -> Self {
|
pub fn new(config: DiskConfig) -> Self {
|
||||||
// Create hysteresis thresholds for disk temperature from config
|
let temperature_thresholds = HysteresisThresholds::new(
|
||||||
let temperature_thresholds = HysteresisThresholds::with_custom_gaps(
|
|
||||||
config.temperature_warning_celsius,
|
config.temperature_warning_celsius,
|
||||||
5.0, // 5°C gap for recovery
|
|
||||||
config.temperature_critical_celsius,
|
config.temperature_critical_celsius,
|
||||||
5.0, // 5°C gap for recovery
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// Detect devices for all configured filesystems at startup
|
Self {
|
||||||
let mut detected_devices = std::collections::HashMap::new();
|
|
||||||
for fs_config in &config.filesystems {
|
|
||||||
if fs_config.monitor {
|
|
||||||
if let Ok(devices) = Self::detect_device_for_mount_point_static(&fs_config.mount_point) {
|
|
||||||
detected_devices.insert(fs_config.mount_point.clone(), devices);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Self {
|
|
||||||
config,
|
config,
|
||||||
temperature_thresholds,
|
temperature_thresholds,
|
||||||
detected_devices,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate disk temperature status using hysteresis thresholds
|
/// Collect all storage data and populate AgentData
|
||||||
fn calculate_temperature_status(&self, metric_name: &str, temperature: f32, status_tracker: &mut StatusTracker) -> Status {
|
async fn collect_storage_data(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
status_tracker.calculate_with_hysteresis(metric_name, temperature, &self.temperature_thresholds)
|
// Clear drives and pools to prevent duplicates when updating cached data
|
||||||
}
|
agent_data.system.storage.drives.clear();
|
||||||
|
agent_data.system.storage.pools.clear();
|
||||||
|
|
||||||
|
// Step 1: Get mount points and their backing devices
|
||||||
/// Get configured storage pools with individual drive information
|
let mount_devices = self.get_mount_devices().await?;
|
||||||
fn get_configured_storage_pools(&self) -> Result<Vec<StoragePool>> {
|
|
||||||
let mut storage_pools = Vec::new();
|
|
||||||
|
|
||||||
for fs_config in &self.config.filesystems {
|
|
||||||
if !fs_config.monitor {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get filesystem stats for the mount point
|
|
||||||
match self.get_filesystem_info(&fs_config.mount_point) {
|
|
||||||
Ok((total_bytes, used_bytes)) => {
|
|
||||||
let available_bytes = total_bytes - used_bytes;
|
|
||||||
let usage_percent = if total_bytes > 0 {
|
|
||||||
(used_bytes as f64 / total_bytes as f64) * 100.0
|
|
||||||
} else {
|
|
||||||
0.0
|
|
||||||
};
|
|
||||||
|
|
||||||
// Convert bytes to human-readable format
|
|
||||||
let size = self.bytes_to_human_readable(total_bytes);
|
|
||||||
let used = self.bytes_to_human_readable(used_bytes);
|
|
||||||
let available = self.bytes_to_human_readable(available_bytes);
|
|
||||||
|
|
||||||
// Get individual drive information using pre-detected devices
|
|
||||||
let device_names = self.detected_devices.get(&fs_config.mount_point).cloned().unwrap_or_default();
|
|
||||||
let underlying_drives = self.get_drive_info_for_devices(&device_names)?;
|
|
||||||
|
|
||||||
storage_pools.push(StoragePool {
|
|
||||||
name: fs_config.name.clone(),
|
|
||||||
mount_point: fs_config.mount_point.clone(),
|
|
||||||
filesystem: fs_config.fs_type.clone(),
|
|
||||||
storage_type: fs_config.storage_type.clone(),
|
|
||||||
size,
|
|
||||||
used,
|
|
||||||
available,
|
|
||||||
usage_percent: usage_percent as f32,
|
|
||||||
underlying_drives,
|
|
||||||
});
|
|
||||||
|
|
||||||
debug!(
|
|
||||||
"Storage pool '{}' ({}) at {} with {} detected drives",
|
|
||||||
fs_config.name, fs_config.storage_type, fs_config.mount_point, device_names.len()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
debug!(
|
|
||||||
"Failed to get filesystem info for storage pool '{}': {}",
|
|
||||||
fs_config.name, e
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(storage_pools)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get drive information for a list of device names
|
|
||||||
fn get_drive_info_for_devices(&self, device_names: &[String]) -> Result<Vec<DriveInfo>> {
|
|
||||||
let mut drives = Vec::new();
|
|
||||||
|
|
||||||
for device_name in device_names {
|
// Step 2: Get filesystem usage for each mount point using df
|
||||||
let device_path = format!("/dev/{}", device_name);
|
let mut filesystem_usage = self.get_filesystem_usage(&mount_devices).map_err(|e| CollectorError::Parse {
|
||||||
|
value: "filesystem usage".to_string(),
|
||||||
// Get SMART data for this drive
|
error: format!("Failed to get filesystem usage: {}", e),
|
||||||
let (health_status, temperature, wear_level) = self.get_smart_data(&device_path);
|
})?;
|
||||||
|
|
||||||
drives.push(DriveInfo {
|
|
||||||
device: device_name.clone(),
|
|
||||||
health_status: health_status.clone(),
|
|
||||||
temperature,
|
|
||||||
wear_level,
|
|
||||||
});
|
|
||||||
|
|
||||||
debug!(
|
|
||||||
"Drive info for {}: health={}, temp={:?}°C, wear={:?}%",
|
|
||||||
device_name, health_status, temperature, wear_level
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(drives)
|
// Step 2.5: Add MergerFS mount points that weren't in lsblk output
|
||||||
|
self.add_mergerfs_filesystem_usage(&mut filesystem_usage).map_err(|e| CollectorError::Parse {
|
||||||
|
value: "mergerfs filesystem usage".to_string(),
|
||||||
|
error: format!("Failed to get mergerfs filesystem usage: {}", e),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Step 3: Detect MergerFS pools
|
||||||
|
let mergerfs_pools = self.detect_mergerfs_pools(&filesystem_usage).map_err(|e| CollectorError::Parse {
|
||||||
|
value: "mergerfs pools".to_string(),
|
||||||
|
error: format!("Failed to detect mergerfs pools: {}", e),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Step 4: Group filesystems by physical drive (excluding mergerfs members)
|
||||||
|
let physical_drives = self.group_by_physical_drive(&mount_devices, &filesystem_usage, &mergerfs_pools).map_err(|e| CollectorError::Parse {
|
||||||
|
value: "physical drives".to_string(),
|
||||||
|
error: format!("Failed to group by physical drive: {}", e),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Step 5: Get SMART data for all drives
|
||||||
|
let smart_data = self.get_smart_data_for_drives(&physical_drives, &mergerfs_pools).await;
|
||||||
|
|
||||||
|
// Step 6: Populate AgentData
|
||||||
|
self.populate_drives_data(&physical_drives, &smart_data, agent_data)?;
|
||||||
|
self.populate_pools_data(&mergerfs_pools, &smart_data, agent_data)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get SMART data for a drive (health, temperature, wear level)
|
/// Get block devices and their mount points using lsblk
|
||||||
fn get_smart_data(&self, device_path: &str) -> (String, Option<f32>, Option<f32>) {
|
async fn get_mount_devices(&self) -> Result<HashMap<String, String>, CollectorError> {
|
||||||
// Try to get SMART data using smartctl
|
use super::run_command_with_timeout;
|
||||||
let output = Command::new("sudo")
|
|
||||||
.arg("smartctl")
|
|
||||||
.arg("-a")
|
|
||||||
.arg(device_path)
|
|
||||||
.output();
|
|
||||||
|
|
||||||
match output {
|
|
||||||
Ok(result) if result.status.success() => {
|
|
||||||
let stdout = String::from_utf8_lossy(&result.stdout);
|
|
||||||
|
|
||||||
// Parse health status
|
|
||||||
let health = if stdout.contains("PASSED") {
|
|
||||||
"PASSED".to_string()
|
|
||||||
} else if stdout.contains("FAILED") {
|
|
||||||
"FAILED".to_string()
|
|
||||||
} else {
|
|
||||||
"UNKNOWN".to_string()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Parse temperature (look for various temperature indicators)
|
|
||||||
let temperature = self.parse_temperature_from_smart(&stdout);
|
|
||||||
|
|
||||||
// Parse wear level (for SSDs)
|
|
||||||
let wear_level = self.parse_wear_level_from_smart(&stdout);
|
|
||||||
|
|
||||||
(health, temperature, wear_level)
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
debug!("Failed to get SMART data for {}", device_path);
|
|
||||||
("UNKNOWN".to_string(), None, None)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse temperature from SMART output
|
let mut cmd = TokioCommand::new("lsblk");
|
||||||
fn parse_temperature_from_smart(&self, smart_output: &str) -> Option<f32> {
|
cmd.args(&["-rn", "-o", "NAME,MOUNTPOINT"]);
|
||||||
for line in smart_output.lines() {
|
|
||||||
// Look for temperature in various formats
|
|
||||||
if line.contains("Temperature_Celsius") || line.contains("Temperature") {
|
|
||||||
let parts: Vec<&str> = line.split_whitespace().collect();
|
|
||||||
if parts.len() >= 10 {
|
|
||||||
if let Ok(temp) = parts[9].parse::<f32>() {
|
|
||||||
return Some(temp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// NVMe drives might show temperature differently
|
|
||||||
if line.contains("temperature:") {
|
|
||||||
if let Some(temp_part) = line.split("temperature:").nth(1) {
|
|
||||||
if let Some(temp_str) = temp_part.split_whitespace().next() {
|
|
||||||
if let Ok(temp) = temp_str.parse::<f32>() {
|
|
||||||
return Some(temp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse wear level from SMART output (SSD wear leveling)
|
let output = run_command_with_timeout(cmd, 10).await
|
||||||
/// Supports both NVMe and SATA SSD wear indicators
|
.map_err(|e| CollectorError::SystemRead {
|
||||||
fn parse_wear_level_from_smart(&self, smart_output: &str) -> Option<f32> {
|
path: "block devices".to_string(),
|
||||||
for line in smart_output.lines() {
|
error: e.to_string(),
|
||||||
let line = line.trim();
|
})?;
|
||||||
|
|
||||||
// NVMe drives - direct percentage used
|
let mut mount_devices = HashMap::new();
|
||||||
if line.contains("Percentage Used:") {
|
for line in String::from_utf8_lossy(&output.stdout).lines() {
|
||||||
if let Some(wear_part) = line.split("Percentage Used:").nth(1) {
|
|
||||||
if let Some(wear_str) = wear_part.split('%').next() {
|
|
||||||
if let Ok(wear) = wear_str.trim().parse::<f32>() {
|
|
||||||
return Some(wear);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// SATA SSD attributes - parse SMART table format
|
|
||||||
// Format: ID ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
|
|
||||||
let parts: Vec<&str> = line.split_whitespace().collect();
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||||
if parts.len() >= 10 {
|
if parts.len() >= 2 {
|
||||||
// SSD Life Left / Percent Lifetime Remaining (higher = less wear)
|
let device_name = parts[0];
|
||||||
if line.contains("SSD_Life_Left") || line.contains("Percent_Lifetime_Remain") {
|
let mount_point = parts[1];
|
||||||
if let Ok(remaining) = parts[3].parse::<f32>() { // VALUE column
|
|
||||||
return Some(100.0 - remaining); // Convert remaining to used
|
// Skip swap partitions and unmounted devices
|
||||||
}
|
if mount_point == "[SWAP]" || mount_point.is_empty() {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Media Wearout Indicator (lower = more wear, normalize to 0-100)
|
// Convert device name to full path
|
||||||
if line.contains("Media_Wearout_Indicator") {
|
let device_path = format!("/dev/{}", device_name);
|
||||||
if let Ok(remaining) = parts[3].parse::<f32>() { // VALUE column
|
mount_devices.insert(mount_point.to_string(), device_path);
|
||||||
return Some(100.0 - remaining); // Convert remaining to used
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wear Leveling Count (higher = less wear, but varies by manufacturer)
|
|
||||||
if line.contains("Wear_Leveling_Count") {
|
|
||||||
if let Ok(wear_count) = parts[3].parse::<f32>() { // VALUE column
|
|
||||||
// Most SSDs: 100 = new, decreases with wear
|
|
||||||
if wear_count <= 100.0 {
|
|
||||||
return Some(100.0 - wear_count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Total LBAs Written - calculate against typical endurance if available
|
|
||||||
// This is more complex and manufacturer-specific, so we skip for now
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None
|
|
||||||
|
Ok(mount_devices)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convert bytes to human-readable format
|
/// Use df to get filesystem usage for mount points
|
||||||
fn bytes_to_human_readable(&self, bytes: u64) -> String {
|
fn get_filesystem_usage(&self, mount_devices: &HashMap<String, String>) -> anyhow::Result<HashMap<String, (u64, u64)>> {
|
||||||
const UNITS: &[&str] = &["B", "K", "M", "G", "T"];
|
let mut filesystem_usage = HashMap::new();
|
||||||
let mut size = bytes as f64;
|
|
||||||
let mut unit_index = 0;
|
for mount_point in mount_devices.keys() {
|
||||||
|
match self.get_filesystem_info(mount_point) {
|
||||||
while size >= 1024.0 && unit_index < UNITS.len() - 1 {
|
Ok((total, used)) => {
|
||||||
size /= 1024.0;
|
filesystem_usage.insert(mount_point.clone(), (total, used));
|
||||||
unit_index += 1;
|
}
|
||||||
|
Err(_e) => {
|
||||||
|
// Silently skip filesystems we can't read
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if unit_index == 0 {
|
Ok(filesystem_usage)
|
||||||
format!("{:.0}{}", size, UNITS[unit_index])
|
|
||||||
} else {
|
|
||||||
format!("{:.1}{}", size, UNITS[unit_index])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Detect device backing a mount point using lsblk (static version for startup)
|
/// Add filesystem usage for MergerFS mount points that aren't in lsblk
|
||||||
fn detect_device_for_mount_point_static(mount_point: &str) -> Result<Vec<String>> {
|
fn add_mergerfs_filesystem_usage(&self, filesystem_usage: &mut HashMap<String, (u64, u64)>) -> anyhow::Result<()> {
|
||||||
let output = Command::new("lsblk")
|
let mounts_content = std::fs::read_to_string("/proc/mounts")
|
||||||
.args(&["-n", "-o", "NAME,MOUNTPOINT"])
|
.map_err(|e| anyhow::anyhow!("Failed to read /proc/mounts: {}", e))?;
|
||||||
.output()?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
for line in mounts_content.lines() {
|
||||||
return Ok(Vec::new());
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if parts.len() >= 3 && parts[2] == "fuse.mergerfs" {
|
||||||
|
let mount_point = parts[1].to_string();
|
||||||
|
|
||||||
|
// Only add if we don't already have usage data for this mount point
|
||||||
|
if !filesystem_usage.contains_key(&mount_point) {
|
||||||
|
if let Ok((total, used)) = self.get_filesystem_info(&mount_point) {
|
||||||
|
filesystem_usage.insert(mount_point, (total, used));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get filesystem info for a single mount point
|
||||||
|
fn get_filesystem_info(&self, mount_point: &str) -> Result<(u64, u64), CollectorError> {
|
||||||
|
let output = StdCommand::new("timeout")
|
||||||
|
.args(&["10", "df", "--block-size=1", mount_point])
|
||||||
|
.output()
|
||||||
|
.map_err(|e| CollectorError::SystemRead {
|
||||||
|
path: format!("df {}", mount_point),
|
||||||
|
error: e.to_string(),
|
||||||
|
})?;
|
||||||
|
|
||||||
let output_str = String::from_utf8_lossy(&output.stdout);
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
|
||||||
for line in output_str.lines() {
|
|
||||||
let parts: Vec<&str> = line.split_whitespace().collect();
|
|
||||||
if parts.len() >= 2 && parts[1] == mount_point {
|
|
||||||
// Remove tree symbols and extract device name (e.g., "├─nvme0n1p2" -> "nvme0n1p2")
|
|
||||||
let device_name = parts[0]
|
|
||||||
.trim_start_matches('├')
|
|
||||||
.trim_start_matches('└')
|
|
||||||
.trim_start_matches('─')
|
|
||||||
.trim();
|
|
||||||
|
|
||||||
// Extract base device name (e.g., "nvme0n1p2" -> "nvme0n1")
|
|
||||||
if let Some(base_device) = Self::extract_base_device(device_name) {
|
|
||||||
return Ok(vec![base_device]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Vec::new())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract base device name from partition (e.g., "nvme0n1p2" -> "nvme0n1", "sda1" -> "sda")
|
|
||||||
fn extract_base_device(device_name: &str) -> Option<String> {
|
|
||||||
// Handle NVMe devices (nvme0n1p1 -> nvme0n1)
|
|
||||||
if device_name.starts_with("nvme") {
|
|
||||||
if let Some(p_pos) = device_name.find('p') {
|
|
||||||
return Some(device_name[..p_pos].to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle traditional devices (sda1 -> sda)
|
|
||||||
if device_name.len() > 1 {
|
|
||||||
let chars: Vec<char> = device_name.chars().collect();
|
|
||||||
let mut end_idx = chars.len();
|
|
||||||
|
|
||||||
// Find where the device name ends and partition number begins
|
|
||||||
for (i, &c) in chars.iter().enumerate().rev() {
|
|
||||||
if !c.is_ascii_digit() {
|
|
||||||
end_idx = i + 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if end_idx > 0 && end_idx < chars.len() {
|
|
||||||
return Some(chars[..end_idx].iter().collect());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If no partition detected, return as-is
|
|
||||||
Some(device_name.to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Get filesystem info using df command
|
|
||||||
fn get_filesystem_info(&self, path: &str) -> Result<(u64, u64)> {
|
|
||||||
let output = Command::new("df")
|
|
||||||
.arg("--block-size=1")
|
|
||||||
.arg(path)
|
|
||||||
.output()?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
return Err(anyhow::anyhow!("df command failed for {}", path));
|
|
||||||
}
|
|
||||||
|
|
||||||
let output_str = String::from_utf8(output.stdout)?;
|
|
||||||
let lines: Vec<&str> = output_str.lines().collect();
|
let lines: Vec<&str> = output_str.lines().collect();
|
||||||
|
|
||||||
if lines.len() < 2 {
|
if lines.len() < 2 {
|
||||||
return Err(anyhow::anyhow!("Unexpected df output format"));
|
return Err(CollectorError::Parse {
|
||||||
|
value: output_str.to_string(),
|
||||||
|
error: "Expected at least 2 lines from df output".to_string(),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let fields: Vec<&str> = lines[1].split_whitespace().collect();
|
// Parse the data line (skip header)
|
||||||
if fields.len() < 4 {
|
let parts: Vec<&str> = lines[1].split_whitespace().collect();
|
||||||
return Err(anyhow::anyhow!("Unexpected df fields count"));
|
if parts.len() < 4 {
|
||||||
|
return Err(CollectorError::Parse {
|
||||||
|
value: lines[1].to_string(),
|
||||||
|
error: "Expected at least 4 fields in df output".to_string(),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let total_bytes = fields[1].parse::<u64>()?;
|
let total_bytes: u64 = parts[1].parse().map_err(|e| CollectorError::Parse {
|
||||||
let used_bytes = fields[2].parse::<u64>()?;
|
value: parts[1].to_string(),
|
||||||
|
error: format!("Failed to parse total bytes: {}", e),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let used_bytes: u64 = parts[2].parse().map_err(|e| CollectorError::Parse {
|
||||||
|
value: parts[2].to_string(),
|
||||||
|
error: format!("Failed to parse used bytes: {}", e),
|
||||||
|
})?;
|
||||||
|
|
||||||
Ok((total_bytes, used_bytes))
|
Ok((total_bytes, used_bytes))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Detect MergerFS pools from mount data
|
||||||
|
fn detect_mergerfs_pools(&self, filesystem_usage: &HashMap<String, (u64, u64)>) -> anyhow::Result<Vec<MergerfsPool>> {
|
||||||
|
let mounts_content = std::fs::read_to_string("/proc/mounts")
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to read /proc/mounts: {}", e))?;
|
||||||
|
let mut pools = Vec::new();
|
||||||
|
|
||||||
|
for line in mounts_content.lines() {
|
||||||
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if parts.len() >= 3 && parts[2] == "fuse.mergerfs" {
|
||||||
|
let mount_point = parts[1].to_string();
|
||||||
|
let device_sources = parts[0]; // e.g., "/mnt/disk1:/mnt/disk2"
|
||||||
|
|
||||||
|
// Get pool usage
|
||||||
|
let (total_bytes, used_bytes) = filesystem_usage.get(&mount_point)
|
||||||
|
.copied()
|
||||||
|
.unwrap_or((0, 0));
|
||||||
|
|
||||||
|
// Extract pool name from mount point (e.g., "/srv/media" -> "srv_media")
|
||||||
|
let pool_name = if mount_point == "/" {
|
||||||
|
"root".to_string()
|
||||||
|
} else {
|
||||||
|
mount_point.trim_start_matches('/').replace('/', "_")
|
||||||
|
};
|
||||||
|
|
||||||
/// Parse size string (e.g., "120G", "45M") to GB value
|
if pool_name.is_empty() {
|
||||||
fn parse_size_to_gb(&self, size_str: &str) -> f32 {
|
continue;
|
||||||
let size_str = size_str.trim();
|
}
|
||||||
if size_str.is_empty() || size_str == "-" {
|
|
||||||
return 0.0;
|
// Parse member paths - handle both full paths and numeric references
|
||||||
}
|
let raw_paths: Vec<String> = device_sources
|
||||||
|
.split(':')
|
||||||
// Extract numeric part and unit
|
.map(|s| s.trim().to_string())
|
||||||
let (num_str, unit) = if let Some(last_char) = size_str.chars().last() {
|
.filter(|s| !s.is_empty())
|
||||||
if last_char.is_alphabetic() {
|
.collect();
|
||||||
let num_part = &size_str[..size_str.len() - 1];
|
|
||||||
let unit_part = &size_str[size_str.len() - 1..];
|
// Convert numeric references to actual mount points if needed
|
||||||
(num_part, unit_part)
|
let member_paths = if raw_paths.iter().any(|path| !path.starts_with('/')) {
|
||||||
} else {
|
// Handle numeric format like "1:2" by finding corresponding /mnt/disk* paths
|
||||||
(size_str, "")
|
self.resolve_numeric_mergerfs_paths(&raw_paths)?
|
||||||
|
} else {
|
||||||
|
// Already full paths
|
||||||
|
raw_paths
|
||||||
|
};
|
||||||
|
|
||||||
|
// For SnapRAID setups, include parity drives that are related to this pool's data drives
|
||||||
|
let mut all_member_paths = member_paths.clone();
|
||||||
|
let related_parity_paths = self.discover_related_parity_drives(&member_paths)?;
|
||||||
|
all_member_paths.extend(related_parity_paths);
|
||||||
|
|
||||||
|
// Categorize as data vs parity drives
|
||||||
|
let (data_drives, parity_drives) = match self.categorize_pool_drives(&all_member_paths) {
|
||||||
|
Ok(drives) => drives,
|
||||||
|
Err(_e) => {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pools.push(MergerfsPool {
|
||||||
|
name: pool_name,
|
||||||
|
mount_point,
|
||||||
|
total_bytes,
|
||||||
|
used_bytes,
|
||||||
|
data_drives,
|
||||||
|
parity_drives,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
(size_str, "")
|
|
||||||
};
|
|
||||||
|
|
||||||
let number: f32 = num_str.parse().unwrap_or(0.0);
|
|
||||||
|
|
||||||
match unit.to_uppercase().as_str() {
|
|
||||||
"T" | "TB" => number * 1024.0,
|
|
||||||
"G" | "GB" => number,
|
|
||||||
"M" | "MB" => number / 1024.0,
|
|
||||||
"K" | "KB" => number / (1024.0 * 1024.0),
|
|
||||||
"B" | "" => number / (1024.0 * 1024.0 * 1024.0),
|
|
||||||
_ => number, // Assume GB if unknown unit
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(pools)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Group filesystems by physical drive (excluding mergerfs members) - exact old logic
|
||||||
|
fn group_by_physical_drive(
|
||||||
|
&self,
|
||||||
|
mount_devices: &HashMap<String, String>,
|
||||||
|
filesystem_usage: &HashMap<String, (u64, u64)>,
|
||||||
|
mergerfs_pools: &[MergerfsPool]
|
||||||
|
) -> anyhow::Result<Vec<PhysicalDrive>> {
|
||||||
|
let mut drive_groups: HashMap<String, Vec<Filesystem>> = HashMap::new();
|
||||||
|
|
||||||
|
// Get all mergerfs member paths to exclude them - exactly like old code
|
||||||
|
let mut mergerfs_members = std::collections::HashSet::new();
|
||||||
|
for pool in mergerfs_pools {
|
||||||
|
for drive in &pool.data_drives {
|
||||||
|
mergerfs_members.insert(drive.mount_point.clone());
|
||||||
|
}
|
||||||
|
for drive in &pool.parity_drives {
|
||||||
|
mergerfs_members.insert(drive.mount_point.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group filesystems by base device
|
||||||
|
for (mount_point, device) in mount_devices {
|
||||||
|
// Skip mergerfs member mounts
|
||||||
|
if mergerfs_members.contains(mount_point) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let base_device = self.extract_base_device(device);
|
||||||
|
|
||||||
|
if let Some((total, used)) = filesystem_usage.get(mount_point) {
|
||||||
|
let usage_percent = (*used as f32 / *total as f32) * 100.0;
|
||||||
|
|
||||||
|
let filesystem = Filesystem {
|
||||||
|
mount_point: mount_point.clone(), // Keep actual mount point like "/" and "/boot"
|
||||||
|
usage_percent,
|
||||||
|
used_bytes: *used,
|
||||||
|
total_bytes: *total,
|
||||||
|
};
|
||||||
|
|
||||||
|
drive_groups.entry(base_device).or_insert_with(Vec::new).push(filesystem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to PhysicalDrive structs
|
||||||
|
let mut physical_drives = Vec::new();
|
||||||
|
for (drive_name, filesystems) in drive_groups {
|
||||||
|
let physical_drive = PhysicalDrive {
|
||||||
|
name: drive_name,
|
||||||
|
health: "UNKNOWN".to_string(), // Will be updated with SMART data
|
||||||
|
filesystems,
|
||||||
|
};
|
||||||
|
physical_drives.push(physical_drive);
|
||||||
|
}
|
||||||
|
|
||||||
|
physical_drives.sort_by(|a, b| a.name.cmp(&b.name));
|
||||||
|
Ok(physical_drives)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract base device name from device path
|
||||||
|
fn extract_base_device(&self, device: &str) -> String {
|
||||||
|
// Extract base device name (e.g., "/dev/nvme0n1p1" -> "nvme0n1")
|
||||||
|
if let Some(dev_name) = device.strip_prefix("/dev/") {
|
||||||
|
// Remove partition numbers: nvme0n1p1 -> nvme0n1, sda1 -> sda
|
||||||
|
if let Some(pos) = dev_name.find('p') {
|
||||||
|
if dev_name[pos+1..].chars().all(char::is_numeric) {
|
||||||
|
return dev_name[..pos].to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Handle traditional naming: sda1 -> sda
|
||||||
|
let mut result = String::new();
|
||||||
|
for ch in dev_name.chars() {
|
||||||
|
if ch.is_ascii_digit() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
result.push(ch);
|
||||||
|
}
|
||||||
|
if !result.is_empty() {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
device.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get SMART data for drives in parallel
|
||||||
|
async fn get_smart_data_for_drives(&self, physical_drives: &[PhysicalDrive], mergerfs_pools: &[MergerfsPool]) -> HashMap<String, SmartData> {
|
||||||
|
use futures::future::join_all;
|
||||||
|
|
||||||
|
// Collect all drive names
|
||||||
|
let mut all_drives = std::collections::HashSet::new();
|
||||||
|
for drive in physical_drives {
|
||||||
|
all_drives.insert(drive.name.clone());
|
||||||
|
}
|
||||||
|
for pool in mergerfs_pools {
|
||||||
|
for drive in &pool.data_drives {
|
||||||
|
all_drives.insert(drive.name.clone());
|
||||||
|
}
|
||||||
|
for drive in &pool.parity_drives {
|
||||||
|
all_drives.insert(drive.name.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect SMART data for all drives in parallel
|
||||||
|
let futures: Vec<_> = all_drives
|
||||||
|
.iter()
|
||||||
|
.map(|drive_name| {
|
||||||
|
let drive = drive_name.clone();
|
||||||
|
async move {
|
||||||
|
let result = self.get_smart_data(&drive).await;
|
||||||
|
(drive, result)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let results = join_all(futures).await;
|
||||||
|
|
||||||
|
// Build HashMap from results
|
||||||
|
let mut smart_data = HashMap::new();
|
||||||
|
for (drive_name, result) in results {
|
||||||
|
if let Ok(data) = result {
|
||||||
|
smart_data.insert(drive_name, data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
smart_data
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get SMART data for a single drive
|
||||||
|
async fn get_smart_data(&self, drive_name: &str) -> Result<SmartData, CollectorError> {
|
||||||
|
use super::run_command_with_timeout;
|
||||||
|
|
||||||
|
// Use direct smartctl (no sudo) - service has CAP_SYS_RAWIO and CAP_SYS_ADMIN capabilities
|
||||||
|
// For NVMe drives, specify device type explicitly
|
||||||
|
let mut cmd = TokioCommand::new("smartctl");
|
||||||
|
if drive_name.starts_with("nvme") {
|
||||||
|
cmd.args(&["-d", "nvme", "-a", &format!("/dev/{}", drive_name)]);
|
||||||
|
} else {
|
||||||
|
cmd.args(&["-a", &format!("/dev/{}", drive_name)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
let output = run_command_with_timeout(cmd, 15).await
|
||||||
|
.map_err(|e| CollectorError::SystemRead {
|
||||||
|
path: format!("SMART data for {}", drive_name),
|
||||||
|
error: e.to_string(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
|
||||||
|
// Note: smartctl returns non-zero exit codes for warnings (like exit code 32
|
||||||
|
// for "temperature was high in the past"), but the output data is still valid.
|
||||||
|
// Only check if we got any output at all, don't reject based on exit code.
|
||||||
|
if output_str.is_empty() {
|
||||||
|
return Ok(SmartData {
|
||||||
|
health: "UNKNOWN".to_string(),
|
||||||
|
serial_number: None,
|
||||||
|
temperature_celsius: None,
|
||||||
|
wear_percent: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut health = "UNKNOWN".to_string();
|
||||||
|
let mut serial_number = None;
|
||||||
|
let mut temperature = None;
|
||||||
|
let mut wear_percent = None;
|
||||||
|
|
||||||
|
for line in output_str.lines() {
|
||||||
|
if line.contains("SMART overall-health") {
|
||||||
|
if line.contains("PASSED") {
|
||||||
|
health = "PASSED".to_string();
|
||||||
|
} else if line.contains("FAILED") {
|
||||||
|
health = "FAILED".to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serial number parsing (both SATA and NVMe)
|
||||||
|
if line.contains("Serial Number:") {
|
||||||
|
if let Some(serial_part) = line.split("Serial Number:").nth(1) {
|
||||||
|
let serial_str = serial_part.trim();
|
||||||
|
if !serial_str.is_empty() {
|
||||||
|
// Take first whitespace-separated token
|
||||||
|
if let Some(serial) = serial_str.split_whitespace().next() {
|
||||||
|
serial_number = Some(serial.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Temperature parsing for different drive types
|
||||||
|
if line.contains("Temperature_Celsius") || line.contains("Airflow_Temperature_Cel") || line.contains("Temperature_Case") {
|
||||||
|
// Traditional SATA drives: attribute table format
|
||||||
|
if let Some(temp_str) = line.split_whitespace().nth(9) {
|
||||||
|
if let Ok(temp) = temp_str.parse::<f32>() {
|
||||||
|
temperature = Some(temp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if line.starts_with("Temperature:") {
|
||||||
|
// NVMe drives: simple "Temperature: 27 Celsius" format
|
||||||
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if parts.len() >= 2 {
|
||||||
|
if let Ok(temp) = parts[1].parse::<f32>() {
|
||||||
|
temperature = Some(temp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wear level parsing for SSDs
|
||||||
|
if line.contains("Media_Wearout_Indicator") {
|
||||||
|
// Media_Wearout_Indicator stores remaining life % in column 3 (VALUE)
|
||||||
|
if let Some(wear_str) = line.split_whitespace().nth(3) {
|
||||||
|
if let Ok(remaining) = wear_str.parse::<f32>() {
|
||||||
|
wear_percent = Some(100.0 - remaining); // Convert remaining life to wear
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if line.contains("Wear_Leveling_Count") || line.contains("SSD_Life_Left") {
|
||||||
|
// Other wear attributes store value in column 9 (RAW_VALUE)
|
||||||
|
if let Some(wear_str) = line.split_whitespace().nth(9) {
|
||||||
|
if let Ok(wear) = wear_str.parse::<f32>() {
|
||||||
|
wear_percent = Some(100.0 - wear); // Convert remaining life to wear
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// NVMe wear parsing: "Percentage Used: 1%"
|
||||||
|
if line.contains("Percentage Used:") {
|
||||||
|
if let Some(percent_part) = line.split("Percentage Used:").nth(1) {
|
||||||
|
if let Some(percent_str) = percent_part.split_whitespace().next() {
|
||||||
|
if let Some(percent_clean) = percent_str.strip_suffix('%') {
|
||||||
|
if let Ok(wear) = percent_clean.parse::<f32>() {
|
||||||
|
wear_percent = Some(wear);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(SmartData {
|
||||||
|
health,
|
||||||
|
serial_number,
|
||||||
|
temperature_celsius: temperature,
|
||||||
|
wear_percent,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Populate drives data into AgentData
|
||||||
|
fn populate_drives_data(&self, physical_drives: &[PhysicalDrive], smart_data: &HashMap<String, SmartData>, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
|
for drive in physical_drives {
|
||||||
|
let smart = smart_data.get(&drive.name);
|
||||||
|
|
||||||
|
let mut filesystems: Vec<FilesystemData> = drive.filesystems.iter().map(|fs| {
|
||||||
|
FilesystemData {
|
||||||
|
mount: fs.mount_point.clone(), // This preserves "/" and "/boot" correctly
|
||||||
|
usage_percent: fs.usage_percent,
|
||||||
|
used_gb: fs.used_bytes as f32 / (1024.0 * 1024.0 * 1024.0),
|
||||||
|
total_gb: fs.total_bytes as f32 / (1024.0 * 1024.0 * 1024.0),
|
||||||
|
usage_status: self.calculate_filesystem_usage_status(fs.usage_percent),
|
||||||
|
}
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
// Sort filesystems by mount point for consistent display order
|
||||||
|
filesystems.sort_by(|a, b| a.mount.cmp(&b.mount));
|
||||||
|
|
||||||
|
agent_data.system.storage.drives.push(DriveData {
|
||||||
|
name: drive.name.clone(),
|
||||||
|
serial_number: smart.and_then(|s| s.serial_number.clone()),
|
||||||
|
health: smart.map(|s| s.health.clone()).unwrap_or_else(|| drive.health.clone()),
|
||||||
|
temperature_celsius: smart.and_then(|s| s.temperature_celsius),
|
||||||
|
wear_percent: smart.and_then(|s| s.wear_percent),
|
||||||
|
filesystems,
|
||||||
|
temperature_status: smart.and_then(|s| s.temperature_celsius)
|
||||||
|
.map(|temp| self.calculate_temperature_status(temp))
|
||||||
|
.unwrap_or(Status::Unknown),
|
||||||
|
health_status: self.calculate_health_status(
|
||||||
|
smart.map(|s| s.health.as_str()).unwrap_or("UNKNOWN")
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Populate pools data into AgentData
|
||||||
|
fn populate_pools_data(&self, mergerfs_pools: &[MergerfsPool], smart_data: &HashMap<String, SmartData>, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
|
for pool in mergerfs_pools {
|
||||||
|
// Calculate pool health and statuses based on member drive health
|
||||||
|
let (pool_health, health_status, usage_status, data_drive_data, parity_drive_data) = self.calculate_pool_health(pool, smart_data);
|
||||||
|
|
||||||
|
let pool_data = PoolData {
|
||||||
|
name: pool.name.clone(),
|
||||||
|
mount: pool.mount_point.clone(),
|
||||||
|
pool_type: format!("mergerfs ({}+{})", pool.data_drives.len(), pool.parity_drives.len()),
|
||||||
|
health: pool_health,
|
||||||
|
usage_percent: if pool.total_bytes > 0 {
|
||||||
|
(pool.used_bytes as f32 / pool.total_bytes as f32) * 100.0
|
||||||
|
} else { 0.0 },
|
||||||
|
used_gb: pool.used_bytes as f32 / (1024.0 * 1024.0 * 1024.0),
|
||||||
|
total_gb: pool.total_bytes as f32 / (1024.0 * 1024.0 * 1024.0),
|
||||||
|
data_drives: data_drive_data,
|
||||||
|
parity_drives: parity_drive_data,
|
||||||
|
health_status,
|
||||||
|
usage_status,
|
||||||
|
};
|
||||||
|
|
||||||
|
agent_data.system.storage.pools.push(pool_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate pool health based on member drive status
|
||||||
|
fn calculate_pool_health(&self, pool: &MergerfsPool, smart_data: &HashMap<String, SmartData>) -> (String, cm_dashboard_shared::Status, cm_dashboard_shared::Status, Vec<cm_dashboard_shared::PoolDriveData>, Vec<cm_dashboard_shared::PoolDriveData>) {
|
||||||
|
let mut failed_data = 0;
|
||||||
|
let mut failed_parity = 0;
|
||||||
|
|
||||||
|
// Process data drives
|
||||||
|
let data_drive_data: Vec<cm_dashboard_shared::PoolDriveData> = pool.data_drives.iter().map(|d| {
|
||||||
|
let smart = smart_data.get(&d.name);
|
||||||
|
let health = smart.map(|s| s.health.clone()).unwrap_or_else(|| "UNKNOWN".to_string());
|
||||||
|
let temperature = smart.and_then(|s| s.temperature_celsius).or(d.temperature_celsius);
|
||||||
|
|
||||||
|
if health == "FAILED" {
|
||||||
|
failed_data += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate drive statuses using config thresholds
|
||||||
|
let health_status = self.calculate_health_status(&health);
|
||||||
|
let temperature_status = temperature.map(|t| self.temperature_thresholds.evaluate(t)).unwrap_or(cm_dashboard_shared::Status::Unknown);
|
||||||
|
|
||||||
|
cm_dashboard_shared::PoolDriveData {
|
||||||
|
name: d.name.clone(),
|
||||||
|
serial_number: smart.and_then(|s| s.serial_number.clone()),
|
||||||
|
temperature_celsius: temperature,
|
||||||
|
health,
|
||||||
|
wear_percent: smart.and_then(|s| s.wear_percent),
|
||||||
|
health_status,
|
||||||
|
temperature_status,
|
||||||
|
}
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
// Process parity drives
|
||||||
|
let parity_drive_data: Vec<cm_dashboard_shared::PoolDriveData> = pool.parity_drives.iter().map(|d| {
|
||||||
|
let smart = smart_data.get(&d.name);
|
||||||
|
let health = smart.map(|s| s.health.clone()).unwrap_or_else(|| "UNKNOWN".to_string());
|
||||||
|
let temperature = smart.and_then(|s| s.temperature_celsius).or(d.temperature_celsius);
|
||||||
|
|
||||||
|
if health == "FAILED" {
|
||||||
|
failed_parity += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate drive statuses using config thresholds
|
||||||
|
let health_status = self.calculate_health_status(&health);
|
||||||
|
let temperature_status = temperature.map(|t| self.temperature_thresholds.evaluate(t)).unwrap_or(cm_dashboard_shared::Status::Unknown);
|
||||||
|
|
||||||
|
cm_dashboard_shared::PoolDriveData {
|
||||||
|
name: d.name.clone(),
|
||||||
|
serial_number: smart.and_then(|s| s.serial_number.clone()),
|
||||||
|
temperature_celsius: temperature,
|
||||||
|
health,
|
||||||
|
wear_percent: smart.and_then(|s| s.wear_percent),
|
||||||
|
health_status,
|
||||||
|
temperature_status,
|
||||||
|
}
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
// Calculate overall pool health string and status
|
||||||
|
// SnapRAID logic: can tolerate up to N parity drive failures (where N = number of parity drives)
|
||||||
|
// If data drives fail AND we've lost parity protection, that's critical
|
||||||
|
let (pool_health, health_status) = if failed_data == 0 && failed_parity == 0 {
|
||||||
|
("healthy".to_string(), cm_dashboard_shared::Status::Ok)
|
||||||
|
} else if failed_data == 0 && failed_parity > 0 {
|
||||||
|
// Parity failed but no data loss - degraded (reduced protection)
|
||||||
|
("degraded".to_string(), cm_dashboard_shared::Status::Warning)
|
||||||
|
} else if failed_data == 1 && failed_parity == 0 {
|
||||||
|
// One data drive failed, parity intact - degraded (recoverable)
|
||||||
|
("degraded".to_string(), cm_dashboard_shared::Status::Warning)
|
||||||
|
} else {
|
||||||
|
// Multiple data drives failed OR data+parity failed = data loss risk
|
||||||
|
("critical".to_string(), cm_dashboard_shared::Status::Critical)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Calculate pool usage status using config thresholds
|
||||||
|
let usage_percent = if pool.total_bytes > 0 {
|
||||||
|
(pool.used_bytes as f32 / pool.total_bytes as f32) * 100.0
|
||||||
|
} else { 0.0 };
|
||||||
|
|
||||||
|
let usage_status = if usage_percent >= self.config.usage_critical_percent {
|
||||||
|
cm_dashboard_shared::Status::Critical
|
||||||
|
} else if usage_percent >= self.config.usage_warning_percent {
|
||||||
|
cm_dashboard_shared::Status::Warning
|
||||||
|
} else {
|
||||||
|
cm_dashboard_shared::Status::Ok
|
||||||
|
};
|
||||||
|
|
||||||
|
(pool_health, health_status, usage_status, data_drive_data, parity_drive_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate filesystem usage status
|
||||||
|
fn calculate_filesystem_usage_status(&self, usage_percent: f32) -> Status {
|
||||||
|
// Use standard filesystem warning/critical thresholds
|
||||||
|
if usage_percent >= 95.0 {
|
||||||
|
Status::Critical
|
||||||
|
} else if usage_percent >= 85.0 {
|
||||||
|
Status::Warning
|
||||||
|
} else {
|
||||||
|
Status::Ok
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate drive temperature status
|
||||||
|
fn calculate_temperature_status(&self, temperature: f32) -> Status {
|
||||||
|
self.temperature_thresholds.evaluate(temperature)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate drive health status
|
||||||
|
fn calculate_health_status(&self, health: &str) -> Status {
|
||||||
|
match health {
|
||||||
|
"PASSED" => Status::Ok,
|
||||||
|
"FAILED" => Status::Critical,
|
||||||
|
_ => Status::Unknown,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Discover parity drives that are related to the given data drives
|
||||||
|
fn discover_related_parity_drives(&self, data_drives: &[String]) -> anyhow::Result<Vec<String>> {
|
||||||
|
let mount_devices = tokio::task::block_in_place(|| {
|
||||||
|
tokio::runtime::Handle::current().block_on(self.get_mount_devices())
|
||||||
|
}).map_err(|e| anyhow::anyhow!("Failed to get mount devices: {}", e))?;
|
||||||
|
|
||||||
|
let mut related_parity = Vec::new();
|
||||||
|
|
||||||
|
// Find parity drives that share the same parent directory as the data drives
|
||||||
|
for data_path in data_drives {
|
||||||
|
if let Some(parent_dir) = self.get_parent_directory(data_path) {
|
||||||
|
// Look for parity drives in the same parent directory
|
||||||
|
for (mount_point, _device) in &mount_devices {
|
||||||
|
if mount_point.contains("parity") && mount_point.starts_with(&parent_dir) {
|
||||||
|
if !related_parity.contains(mount_point) {
|
||||||
|
related_parity.push(mount_point.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(related_parity)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get parent directory of a mount path (e.g., "/mnt/disk1" -> "/mnt")
|
||||||
|
fn get_parent_directory(&self, path: &str) -> Option<String> {
|
||||||
|
if let Some(last_slash) = path.rfind('/') {
|
||||||
|
if last_slash > 0 {
|
||||||
|
return Some(path[..last_slash].to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Categorize pool member drives as data vs parity
|
||||||
|
fn categorize_pool_drives(&self, member_paths: &[String]) -> anyhow::Result<(Vec<PoolDrive>, Vec<PoolDrive>)> {
|
||||||
|
let mut data_drives = Vec::new();
|
||||||
|
let mut parity_drives = Vec::new();
|
||||||
|
|
||||||
|
for path in member_paths {
|
||||||
|
let drive_info = self.get_drive_info_for_path(path)?;
|
||||||
|
|
||||||
|
// Heuristic: if path contains "parity", it's parity
|
||||||
|
if path.to_lowercase().contains("parity") {
|
||||||
|
parity_drives.push(drive_info);
|
||||||
|
} else {
|
||||||
|
data_drives.push(drive_info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((data_drives, parity_drives))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get drive information for a mount path
|
||||||
|
fn get_drive_info_for_path(&self, path: &str) -> anyhow::Result<PoolDrive> {
|
||||||
|
// Use lsblk to find the backing device with timeout
|
||||||
|
let output = StdCommand::new("timeout")
|
||||||
|
.args(&["10", "lsblk", "-rn", "-o", "NAME,MOUNTPOINT"])
|
||||||
|
.output()
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to run lsblk: {}", e))?;
|
||||||
|
|
||||||
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
let mut device = String::new();
|
||||||
|
|
||||||
|
for line in output_str.lines() {
|
||||||
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if parts.len() >= 2 && parts[1] == path {
|
||||||
|
device = parts[0].to_string();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if device.is_empty() {
|
||||||
|
return Err(anyhow::anyhow!("Could not find device for path {}", path));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract base device name (e.g., "sda1" -> "sda")
|
||||||
|
let base_device = self.extract_base_device(&format!("/dev/{}", device));
|
||||||
|
|
||||||
|
// Temperature will be filled in later from parallel SMART collection
|
||||||
|
// Don't collect it here to avoid sequential blocking with problematic async nesting
|
||||||
|
Ok(PoolDrive {
|
||||||
|
name: base_device,
|
||||||
|
mount_point: path.to_string(),
|
||||||
|
temperature_celsius: None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve numeric mergerfs references like "1:2" to actual mount paths
|
||||||
|
fn resolve_numeric_mergerfs_paths(&self, numeric_refs: &[String]) -> anyhow::Result<Vec<String>> {
|
||||||
|
let mut resolved_paths = Vec::new();
|
||||||
|
|
||||||
|
// Get all mount points that look like /mnt/disk* or /mnt/parity*
|
||||||
|
let mount_devices = tokio::task::block_in_place(|| {
|
||||||
|
tokio::runtime::Handle::current().block_on(self.get_mount_devices())
|
||||||
|
}).map_err(|e| anyhow::anyhow!("Failed to get mount devices: {}", e))?;
|
||||||
|
|
||||||
|
let mut disk_mounts: Vec<String> = mount_devices.keys()
|
||||||
|
.filter(|path| path.starts_with("/mnt/disk") || path.starts_with("/mnt/parity"))
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
disk_mounts.sort(); // Ensure consistent ordering
|
||||||
|
|
||||||
|
for num_ref in numeric_refs {
|
||||||
|
if let Ok(index) = num_ref.parse::<usize>() {
|
||||||
|
// Convert 1-based index to 0-based
|
||||||
|
if index > 0 && index <= disk_mounts.len() {
|
||||||
|
resolved_paths.push(disk_mounts[index - 1].clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: if we couldn't resolve, return the original paths
|
||||||
|
if resolved_paths.is_empty() {
|
||||||
|
resolved_paths = numeric_refs.to_vec();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(resolved_paths)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl Collector for DiskCollector {
|
impl Collector for DiskCollector {
|
||||||
|
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
self.collect_storage_data(agent_data).await
|
||||||
let start_time = Instant::now();
|
|
||||||
debug!("Collecting storage pool and individual drive metrics");
|
|
||||||
|
|
||||||
let mut metrics = Vec::new();
|
|
||||||
|
|
||||||
// Get configured storage pools with individual drive data
|
|
||||||
let storage_pools = match self.get_configured_storage_pools() {
|
|
||||||
Ok(pools) => {
|
|
||||||
debug!("Found {} storage pools", pools.len());
|
|
||||||
pools
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
debug!("Failed to get storage pools: {}", e);
|
|
||||||
Vec::new()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Generate metrics for each storage pool and its underlying drives
|
|
||||||
for storage_pool in &storage_pools {
|
|
||||||
let timestamp = chrono::Utc::now().timestamp() as u64;
|
|
||||||
|
|
||||||
// Storage pool overall metrics
|
|
||||||
let pool_name = &storage_pool.name;
|
|
||||||
|
|
||||||
// Parse size strings to get actual values for calculations
|
|
||||||
let size_gb = self.parse_size_to_gb(&storage_pool.size);
|
|
||||||
let used_gb = self.parse_size_to_gb(&storage_pool.used);
|
|
||||||
let avail_gb = self.parse_size_to_gb(&storage_pool.available);
|
|
||||||
|
|
||||||
// Calculate status based on configured thresholds
|
|
||||||
let pool_status = if storage_pool.usage_percent >= self.config.usage_critical_percent {
|
|
||||||
Status::Critical
|
|
||||||
} else if storage_pool.usage_percent >= self.config.usage_warning_percent {
|
|
||||||
Status::Warning
|
|
||||||
} else {
|
|
||||||
Status::Ok
|
|
||||||
};
|
|
||||||
|
|
||||||
// Storage pool info metrics
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("disk_{}_mount_point", pool_name),
|
|
||||||
value: MetricValue::String(storage_pool.mount_point.clone()),
|
|
||||||
unit: None,
|
|
||||||
description: Some(format!("Mount: {}", storage_pool.mount_point)),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("disk_{}_filesystem", pool_name),
|
|
||||||
value: MetricValue::String(storage_pool.filesystem.clone()),
|
|
||||||
unit: None,
|
|
||||||
description: Some(format!("FS: {}", storage_pool.filesystem)),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("disk_{}_storage_type", pool_name),
|
|
||||||
value: MetricValue::String(storage_pool.storage_type.clone()),
|
|
||||||
unit: None,
|
|
||||||
description: Some(format!("Type: {}", storage_pool.storage_type)),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Storage pool size metrics
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("disk_{}_total_gb", pool_name),
|
|
||||||
value: MetricValue::Float(size_gb),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
description: Some(format!("Total: {}", storage_pool.size)),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("disk_{}_used_gb", pool_name),
|
|
||||||
value: MetricValue::Float(used_gb),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
description: Some(format!("Used: {}", storage_pool.used)),
|
|
||||||
status: pool_status,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("disk_{}_available_gb", pool_name),
|
|
||||||
value: MetricValue::Float(avail_gb),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
description: Some(format!("Available: {}", storage_pool.available)),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("disk_{}_usage_percent", pool_name),
|
|
||||||
value: MetricValue::Float(storage_pool.usage_percent),
|
|
||||||
unit: Some("%".to_string()),
|
|
||||||
description: Some(format!("Usage: {:.1}%", storage_pool.usage_percent)),
|
|
||||||
status: pool_status,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Individual drive metrics for this storage pool
|
|
||||||
for drive in &storage_pool.underlying_drives {
|
|
||||||
// Drive health status
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("disk_{}_{}_health", pool_name, drive.device),
|
|
||||||
value: MetricValue::String(drive.health_status.clone()),
|
|
||||||
unit: None,
|
|
||||||
description: Some(format!("{}: {}", drive.device, drive.health_status)),
|
|
||||||
status: if drive.health_status == "PASSED" { Status::Ok }
|
|
||||||
else if drive.health_status == "FAILED" { Status::Critical }
|
|
||||||
else { Status::Unknown },
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Drive temperature
|
|
||||||
if let Some(temp) = drive.temperature {
|
|
||||||
let temp_status = self.calculate_temperature_status(
|
|
||||||
&format!("disk_{}_{}_temperature", pool_name, drive.device),
|
|
||||||
temp,
|
|
||||||
status_tracker
|
|
||||||
);
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("disk_{}_{}_temperature", pool_name, drive.device),
|
|
||||||
value: MetricValue::Float(temp),
|
|
||||||
unit: Some("°C".to_string()),
|
|
||||||
description: Some(format!("{}: {:.0}°C", drive.device, temp)),
|
|
||||||
status: temp_status,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Drive wear level (for SSDs)
|
|
||||||
if let Some(wear) = drive.wear_level {
|
|
||||||
let wear_status = if wear >= 90.0 { Status::Critical }
|
|
||||||
else if wear >= 80.0 { Status::Warning }
|
|
||||||
else { Status::Ok };
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("disk_{}_{}_wear_percent", pool_name, drive.device),
|
|
||||||
value: MetricValue::Float(wear),
|
|
||||||
unit: Some("%".to_string()),
|
|
||||||
description: Some(format!("{}: {:.0}% wear", drive.device, wear)),
|
|
||||||
status: wear_status,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add storage pool count metric
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "disk_count".to_string(),
|
|
||||||
value: MetricValue::Integer(storage_pools.len() as i64),
|
|
||||||
unit: None,
|
|
||||||
description: Some(format!("Total storage pools: {}", storage_pools.len())),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
let collection_time = start_time.elapsed();
|
|
||||||
debug!(
|
|
||||||
"Multi-disk collection completed in {:?} with {} metrics",
|
|
||||||
collection_time,
|
|
||||||
metrics.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(metrics)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// SMART data for a drive
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct SmartData {
|
||||||
|
health: String,
|
||||||
|
serial_number: Option<String>,
|
||||||
|
temperature_celsius: Option<f32>,
|
||||||
|
wear_percent: Option<f32>,
|
||||||
|
}
|
||||||
1327
agent/src/collectors/disk_old.rs
Normal file
1327
agent/src/collectors/disk_old.rs
Normal file
@ -0,0 +1,1327 @@
|
|||||||
|
use anyhow::Result;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker, HysteresisThresholds};
|
||||||
|
|
||||||
|
use crate::config::DiskConfig;
|
||||||
|
use std::process::Command;
|
||||||
|
use std::time::Instant;
|
||||||
|
use std::fs;
|
||||||
|
use tracing::debug;
|
||||||
|
|
||||||
|
use super::{Collector, CollectorError};
|
||||||
|
|
||||||
|
/// Mount point information from /proc/mounts
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct MountInfo {
|
||||||
|
device: String, // e.g., "/dev/sda1" or "/mnt/disk1:/mnt/disk2"
|
||||||
|
mount_point: String, // e.g., "/", "/srv/media"
|
||||||
|
fs_type: String, // e.g., "ext4", "xfs", "fuse.mergerfs"
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Auto-discovered storage topology
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct StorageTopology {
|
||||||
|
single_disks: Vec<MountInfo>,
|
||||||
|
mergerfs_pools: Vec<MergerfsPoolInfo>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// MergerFS pool information
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct MergerfsPoolInfo {
|
||||||
|
mount_point: String, // e.g., "/srv/media"
|
||||||
|
data_members: Vec<String>, // e.g., ["/mnt/disk1", "/mnt/disk2"]
|
||||||
|
parity_disks: Vec<String>, // e.g., ["/mnt/parity"]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Information about a storage pool (mount point with underlying drives)
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct StoragePool {
|
||||||
|
name: String, // e.g., "steampool", "root"
|
||||||
|
mount_point: String, // e.g., "/mnt/steampool", "/"
|
||||||
|
filesystem: String, // e.g., "mergerfs", "ext4", "zfs", "btrfs"
|
||||||
|
pool_type: StoragePoolType, // Enhanced pool type with configuration
|
||||||
|
size: String, // e.g., "2.5TB"
|
||||||
|
used: String, // e.g., "2.1TB"
|
||||||
|
available: String, // e.g., "400GB"
|
||||||
|
usage_percent: f32, // e.g., 85.0
|
||||||
|
underlying_drives: Vec<DriveInfo>, // Individual physical drives
|
||||||
|
pool_health: PoolHealth, // Overall pool health status
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Enhanced storage pool types with specific configurations
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
enum StoragePoolType {
|
||||||
|
Single, // Traditional single disk (legacy)
|
||||||
|
PhysicalDrive { // Physical drive with multiple filesystems
|
||||||
|
filesystems: Vec<String>, // Mount points on this drive
|
||||||
|
},
|
||||||
|
MergerfsPool { // MergerFS with optional parity
|
||||||
|
data_disks: Vec<String>, // Member disk names (sdb, sdd)
|
||||||
|
parity_disks: Vec<String>, // Parity disk names (sdc)
|
||||||
|
},
|
||||||
|
#[allow(dead_code)]
|
||||||
|
RaidArray { // Hardware RAID (future)
|
||||||
|
level: String, // "RAID1", "RAID5", etc.
|
||||||
|
member_disks: Vec<String>,
|
||||||
|
spare_disks: Vec<String>,
|
||||||
|
},
|
||||||
|
#[allow(dead_code)]
|
||||||
|
ZfsPool { // ZFS pool (future)
|
||||||
|
pool_name: String,
|
||||||
|
vdevs: Vec<String>,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pool health status for redundant storage
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
|
enum PoolHealth {
|
||||||
|
Healthy, // All drives OK, parity current
|
||||||
|
Degraded, // One drive failed or parity outdated, still functional
|
||||||
|
Critical, // Multiple failures, data at risk
|
||||||
|
#[allow(dead_code)]
|
||||||
|
Rebuilding, // Actively rebuilding/scrubbing (future: SnapRAID status integration)
|
||||||
|
Unknown, // Cannot determine status
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Information about an individual physical drive
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct DriveInfo {
|
||||||
|
device: String, // e.g., "sda", "nvme0n1"
|
||||||
|
health_status: String, // e.g., "PASSED", "FAILED"
|
||||||
|
temperature: Option<f32>, // e.g., 45.0°C
|
||||||
|
wear_level: Option<f32>, // e.g., 12.0% (for SSDs)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Disk usage collector for monitoring filesystem sizes
|
||||||
|
pub struct DiskCollector {
|
||||||
|
config: DiskConfig,
|
||||||
|
temperature_thresholds: HysteresisThresholds,
|
||||||
|
detected_devices: std::collections::HashMap<String, Vec<String>>, // mount_point -> devices
|
||||||
|
storage_topology: Option<StorageTopology>, // Auto-discovered storage layout
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DiskCollector {
|
||||||
|
pub fn new(config: DiskConfig) -> Self {
|
||||||
|
// Create hysteresis thresholds for disk temperature from config
|
||||||
|
let temperature_thresholds = HysteresisThresholds::with_custom_gaps(
|
||||||
|
config.temperature_warning_celsius,
|
||||||
|
5.0, // 5°C gap for recovery
|
||||||
|
config.temperature_critical_celsius,
|
||||||
|
5.0, // 5°C gap for recovery
|
||||||
|
);
|
||||||
|
|
||||||
|
// Perform auto-discovery of storage topology
|
||||||
|
let storage_topology = match Self::auto_discover_storage() {
|
||||||
|
Ok(topology) => {
|
||||||
|
debug!("Auto-discovered storage topology: {} single disks, {} mergerfs pools",
|
||||||
|
topology.single_disks.len(), topology.mergerfs_pools.len());
|
||||||
|
Some(topology)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!("Failed to auto-discover storage topology: {}", e);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Detect devices for discovered storage
|
||||||
|
let mut detected_devices = std::collections::HashMap::new();
|
||||||
|
if let Some(ref topology) = storage_topology {
|
||||||
|
// Add single disks
|
||||||
|
for disk in &topology.single_disks {
|
||||||
|
if let Ok(devices) = Self::detect_device_for_mount_point_static(&disk.mount_point) {
|
||||||
|
detected_devices.insert(disk.mount_point.clone(), devices);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add mergerfs pools and their members
|
||||||
|
for pool in &topology.mergerfs_pools {
|
||||||
|
// Detect devices for the pool itself
|
||||||
|
if let Ok(devices) = Self::detect_device_for_mount_point_static(&pool.mount_point) {
|
||||||
|
detected_devices.insert(pool.mount_point.clone(), devices);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect devices for member disks
|
||||||
|
for member in &pool.data_members {
|
||||||
|
if let Ok(devices) = Self::detect_device_for_mount_point_static(member) {
|
||||||
|
detected_devices.insert(member.clone(), devices);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect devices for parity disks
|
||||||
|
for parity in &pool.parity_disks {
|
||||||
|
if let Ok(devices) = Self::detect_device_for_mount_point_static(parity) {
|
||||||
|
detected_devices.insert(parity.clone(), devices);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fallback: use legacy filesystem config detection
|
||||||
|
for fs_config in &config.filesystems {
|
||||||
|
if fs_config.monitor {
|
||||||
|
if let Ok(devices) = Self::detect_device_for_mount_point_static(&fs_config.mount_point) {
|
||||||
|
detected_devices.insert(fs_config.mount_point.clone(), devices);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
config,
|
||||||
|
temperature_thresholds,
|
||||||
|
detected_devices,
|
||||||
|
storage_topology,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Auto-discover storage topology by parsing system information
|
||||||
|
fn auto_discover_storage() -> Result<StorageTopology> {
|
||||||
|
let mounts = Self::parse_proc_mounts()?;
|
||||||
|
let mut single_disks = Vec::new();
|
||||||
|
let mut mergerfs_pools = Vec::new();
|
||||||
|
|
||||||
|
// Filter out unwanted filesystem types and mount points
|
||||||
|
let exclude_fs_types = ["tmpfs", "devtmpfs", "sysfs", "proc", "cgroup", "cgroup2", "devpts"];
|
||||||
|
let exclude_mount_prefixes = ["/proc", "/sys", "/dev", "/tmp", "/run"];
|
||||||
|
|
||||||
|
for mount in mounts {
|
||||||
|
// Skip excluded filesystem types
|
||||||
|
if exclude_fs_types.contains(&mount.fs_type.as_str()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip excluded mount point prefixes
|
||||||
|
if exclude_mount_prefixes.iter().any(|prefix| mount.mount_point.starts_with(prefix)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match mount.fs_type.as_str() {
|
||||||
|
"fuse.mergerfs" => {
|
||||||
|
// Parse mergerfs pool
|
||||||
|
let data_members = Self::parse_mergerfs_sources(&mount.device);
|
||||||
|
let parity_disks = Self::detect_parity_disks(&data_members);
|
||||||
|
|
||||||
|
mergerfs_pools.push(MergerfsPoolInfo {
|
||||||
|
mount_point: mount.mount_point.clone(),
|
||||||
|
data_members,
|
||||||
|
parity_disks,
|
||||||
|
});
|
||||||
|
|
||||||
|
debug!("Discovered mergerfs pool at {}", mount.mount_point);
|
||||||
|
}
|
||||||
|
"ext4" | "xfs" | "btrfs" | "ntfs" | "vfat" => {
|
||||||
|
// Check if this mount is part of a mergerfs pool
|
||||||
|
let is_mergerfs_member = mergerfs_pools.iter()
|
||||||
|
.any(|pool| pool.data_members.contains(&mount.mount_point) ||
|
||||||
|
pool.parity_disks.contains(&mount.mount_point));
|
||||||
|
|
||||||
|
if !is_mergerfs_member {
|
||||||
|
debug!("Discovered single disk at {}", mount.mount_point);
|
||||||
|
single_disks.push(mount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
debug!("Skipping unsupported filesystem type: {}", mount.fs_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(StorageTopology {
|
||||||
|
single_disks,
|
||||||
|
mergerfs_pools,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse /proc/mounts to get all mount information
|
||||||
|
fn parse_proc_mounts() -> Result<Vec<MountInfo>> {
|
||||||
|
let mounts_content = fs::read_to_string("/proc/mounts")?;
|
||||||
|
let mut mounts = Vec::new();
|
||||||
|
|
||||||
|
for line in mounts_content.lines() {
|
||||||
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if parts.len() >= 3 {
|
||||||
|
mounts.push(MountInfo {
|
||||||
|
device: parts[0].to_string(),
|
||||||
|
mount_point: parts[1].to_string(),
|
||||||
|
fs_type: parts[2].to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(mounts)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse mergerfs source string to extract member paths
|
||||||
|
fn parse_mergerfs_sources(source: &str) -> Vec<String> {
|
||||||
|
// MergerFS source format: "/mnt/disk1:/mnt/disk2:/mnt/disk3"
|
||||||
|
source.split(':')
|
||||||
|
.map(|s| s.trim().to_string())
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Detect potential parity disks based on data member heuristics
|
||||||
|
fn detect_parity_disks(data_members: &[String]) -> Vec<String> {
|
||||||
|
let mut parity_disks = Vec::new();
|
||||||
|
|
||||||
|
// Heuristic 1: Look for mount points with "parity" in the name
|
||||||
|
if let Ok(mounts) = Self::parse_proc_mounts() {
|
||||||
|
for mount in mounts {
|
||||||
|
if mount.mount_point.to_lowercase().contains("parity") &&
|
||||||
|
(mount.fs_type == "xfs" || mount.fs_type == "ext4") {
|
||||||
|
debug!("Detected parity disk by name: {}", mount.mount_point);
|
||||||
|
parity_disks.push(mount.mount_point);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Heuristic 2: Look for sequential device pattern
|
||||||
|
// If data members are /mnt/disk1, /mnt/disk2, look for /mnt/disk* that's not in data
|
||||||
|
if parity_disks.is_empty() {
|
||||||
|
if let Some(pattern) = Self::extract_mount_pattern(data_members) {
|
||||||
|
if let Ok(mounts) = Self::parse_proc_mounts() {
|
||||||
|
for mount in mounts {
|
||||||
|
if mount.mount_point.starts_with(&pattern) &&
|
||||||
|
!data_members.contains(&mount.mount_point) &&
|
||||||
|
(mount.fs_type == "xfs" || mount.fs_type == "ext4") {
|
||||||
|
debug!("Detected parity disk by pattern: {}", mount.mount_point);
|
||||||
|
parity_disks.push(mount.mount_point);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
parity_disks
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract common mount point pattern from data members
|
||||||
|
fn extract_mount_pattern(data_members: &[String]) -> Option<String> {
|
||||||
|
if data_members.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find common prefix (e.g., "/mnt/disk" from "/mnt/disk1", "/mnt/disk2")
|
||||||
|
let first = &data_members[0];
|
||||||
|
if let Some(last_slash) = first.rfind('/') {
|
||||||
|
let base = &first[..last_slash + 1]; // Include the slash
|
||||||
|
|
||||||
|
// Check if all members share this base
|
||||||
|
if data_members.iter().all(|member| member.starts_with(base)) {
|
||||||
|
return Some(base.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate disk temperature status using hysteresis thresholds
|
||||||
|
fn calculate_temperature_status(&self, metric_name: &str, temperature: f32, status_tracker: &mut StatusTracker) -> Status {
|
||||||
|
status_tracker.calculate_with_hysteresis(metric_name, temperature, &self.temperature_thresholds)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Get storage pools using auto-discovered topology or fallback to configuration
|
||||||
|
fn get_configured_storage_pools(&self) -> Result<Vec<StoragePool>> {
|
||||||
|
if let Some(ref topology) = self.storage_topology {
|
||||||
|
self.get_auto_discovered_storage_pools(topology)
|
||||||
|
} else {
|
||||||
|
self.get_legacy_configured_storage_pools()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get storage pools from auto-discovered topology
|
||||||
|
fn get_auto_discovered_storage_pools(&self, topology: &StorageTopology) -> Result<Vec<StoragePool>> {
|
||||||
|
let mut storage_pools = Vec::new();
|
||||||
|
|
||||||
|
// Group single disks by physical drive for unified pool display
|
||||||
|
let grouped_disks = self.group_filesystems_by_physical_drive(&topology.single_disks)?;
|
||||||
|
|
||||||
|
// Process grouped single disks (each physical drive becomes a pool)
|
||||||
|
for (drive_name, filesystems) in grouped_disks {
|
||||||
|
// Create a unified pool for this physical drive
|
||||||
|
let pool = self.create_physical_drive_pool(&drive_name, &filesystems)?;
|
||||||
|
storage_pools.push(pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
// IMPORTANT: Do not create individual filesystem pools when using auto-discovery
|
||||||
|
// All single disk filesystems should be grouped into physical drive pools above
|
||||||
|
|
||||||
|
// Process mergerfs pools (these remain as logical pools)
|
||||||
|
for pool_info in &topology.mergerfs_pools {
|
||||||
|
if let Ok((total_bytes, used_bytes)) = self.get_filesystem_info(&pool_info.mount_point) {
|
||||||
|
let available_bytes = total_bytes - used_bytes;
|
||||||
|
let usage_percent = if total_bytes > 0 {
|
||||||
|
(used_bytes as f64 / total_bytes as f64) * 100.0
|
||||||
|
} else { 0.0 };
|
||||||
|
|
||||||
|
let size = self.bytes_to_human_readable(total_bytes);
|
||||||
|
let used = self.bytes_to_human_readable(used_bytes);
|
||||||
|
let available = self.bytes_to_human_readable(available_bytes);
|
||||||
|
|
||||||
|
// Collect all member and parity drives
|
||||||
|
let mut all_drives = Vec::new();
|
||||||
|
|
||||||
|
// Add data member drives
|
||||||
|
for member in &pool_info.data_members {
|
||||||
|
if let Some(devices) = self.detected_devices.get(member) {
|
||||||
|
all_drives.extend(devices.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add parity drives
|
||||||
|
for parity in &pool_info.parity_disks {
|
||||||
|
if let Some(devices) = self.detected_devices.get(parity) {
|
||||||
|
all_drives.extend(devices.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let underlying_drives = self.get_drive_info_for_devices(&all_drives)?;
|
||||||
|
|
||||||
|
// Calculate pool health
|
||||||
|
let pool_health = self.calculate_mergerfs_pool_health(&pool_info.data_members, &pool_info.parity_disks, &underlying_drives);
|
||||||
|
|
||||||
|
// Generate pool name from mount point
|
||||||
|
let name = pool_info.mount_point.trim_start_matches('/').replace('/', "_");
|
||||||
|
|
||||||
|
storage_pools.push(StoragePool {
|
||||||
|
name,
|
||||||
|
mount_point: pool_info.mount_point.clone(),
|
||||||
|
filesystem: "fuse.mergerfs".to_string(),
|
||||||
|
pool_type: StoragePoolType::MergerfsPool {
|
||||||
|
data_disks: pool_info.data_members.iter()
|
||||||
|
.filter_map(|member| self.detected_devices.get(member).and_then(|devices| devices.first().cloned()))
|
||||||
|
.collect(),
|
||||||
|
parity_disks: pool_info.parity_disks.iter()
|
||||||
|
.filter_map(|parity| self.detected_devices.get(parity).and_then(|devices| devices.first().cloned()))
|
||||||
|
.collect(),
|
||||||
|
},
|
||||||
|
size,
|
||||||
|
used,
|
||||||
|
available,
|
||||||
|
usage_percent: usage_percent as f32,
|
||||||
|
underlying_drives,
|
||||||
|
pool_health,
|
||||||
|
});
|
||||||
|
|
||||||
|
debug!("Auto-discovered mergerfs pool: {} with {} data + {} parity disks",
|
||||||
|
pool_info.mount_point, pool_info.data_members.len(), pool_info.parity_disks.len());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(storage_pools)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Group filesystems by their backing physical drive
|
||||||
|
fn group_filesystems_by_physical_drive(&self, filesystems: &[MountInfo]) -> Result<std::collections::HashMap<String, Vec<MountInfo>>> {
|
||||||
|
let mut grouped = std::collections::HashMap::new();
|
||||||
|
|
||||||
|
for fs in filesystems {
|
||||||
|
// Get the physical drive name for this mount point
|
||||||
|
if let Some(devices) = self.detected_devices.get(&fs.mount_point) {
|
||||||
|
if let Some(device_name) = devices.first() {
|
||||||
|
// Extract base drive name from detected device
|
||||||
|
let drive_name = Self::extract_base_device(device_name)
|
||||||
|
.unwrap_or_else(|| device_name.clone());
|
||||||
|
|
||||||
|
debug!("Grouping filesystem {} (device: {}) under drive: {}",
|
||||||
|
fs.mount_point, device_name, drive_name);
|
||||||
|
|
||||||
|
grouped.entry(drive_name).or_insert_with(Vec::new).push(fs.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!("Filesystem grouping result: {} drives with filesystems: {:?}",
|
||||||
|
grouped.len(),
|
||||||
|
grouped.keys().collect::<Vec<_>>());
|
||||||
|
|
||||||
|
Ok(grouped)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a physical drive pool containing multiple filesystems
|
||||||
|
fn create_physical_drive_pool(&self, drive_name: &str, filesystems: &[MountInfo]) -> Result<StoragePool> {
|
||||||
|
if filesystems.is_empty() {
|
||||||
|
return Err(anyhow::anyhow!("No filesystems for drive {}", drive_name));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate total usage across all filesystems on this drive
|
||||||
|
let mut total_capacity = 0u64;
|
||||||
|
let mut total_used = 0u64;
|
||||||
|
|
||||||
|
for fs in filesystems {
|
||||||
|
if let Ok((capacity, used)) = self.get_filesystem_info(&fs.mount_point) {
|
||||||
|
total_capacity += capacity;
|
||||||
|
total_used += used;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_available = total_capacity.saturating_sub(total_used);
|
||||||
|
let usage_percent = if total_capacity > 0 {
|
||||||
|
(total_used as f64 / total_capacity as f64) * 100.0
|
||||||
|
} else { 0.0 };
|
||||||
|
|
||||||
|
// Get drive information for SMART data
|
||||||
|
let device_names = vec![drive_name.to_string()];
|
||||||
|
let underlying_drives = self.get_drive_info_for_devices(&device_names)?;
|
||||||
|
|
||||||
|
// Collect filesystem mount points for this drive
|
||||||
|
let filesystem_mount_points: Vec<String> = filesystems.iter()
|
||||||
|
.map(|fs| fs.mount_point.clone())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(StoragePool {
|
||||||
|
name: drive_name.to_string(),
|
||||||
|
mount_point: format!("(physical drive)"), // Special marker for physical drives
|
||||||
|
filesystem: "physical".to_string(),
|
||||||
|
pool_type: StoragePoolType::PhysicalDrive {
|
||||||
|
filesystems: filesystem_mount_points,
|
||||||
|
},
|
||||||
|
size: self.bytes_to_human_readable(total_capacity),
|
||||||
|
used: self.bytes_to_human_readable(total_used),
|
||||||
|
available: self.bytes_to_human_readable(total_available),
|
||||||
|
usage_percent: usage_percent as f32,
|
||||||
|
pool_health: if underlying_drives.iter().all(|d| d.health_status == "PASSED") {
|
||||||
|
PoolHealth::Healthy
|
||||||
|
} else {
|
||||||
|
PoolHealth::Critical
|
||||||
|
},
|
||||||
|
underlying_drives,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate pool health specifically for mergerfs pools
|
||||||
|
fn calculate_mergerfs_pool_health(&self, data_members: &[String], parity_disks: &[String], drives: &[DriveInfo]) -> PoolHealth {
|
||||||
|
// Get device names for data and parity drives
|
||||||
|
let mut data_device_names = Vec::new();
|
||||||
|
let mut parity_device_names = Vec::new();
|
||||||
|
|
||||||
|
for member in data_members {
|
||||||
|
if let Some(devices) = self.detected_devices.get(member) {
|
||||||
|
data_device_names.extend(devices.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for parity in parity_disks {
|
||||||
|
if let Some(devices) = self.detected_devices.get(parity) {
|
||||||
|
parity_device_names.extend(devices.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let failed_data = drives.iter()
|
||||||
|
.filter(|d| data_device_names.contains(&d.device) && d.health_status != "PASSED")
|
||||||
|
.count();
|
||||||
|
let failed_parity = drives.iter()
|
||||||
|
.filter(|d| parity_device_names.contains(&d.device) && d.health_status != "PASSED")
|
||||||
|
.count();
|
||||||
|
|
||||||
|
match (failed_data, failed_parity) {
|
||||||
|
(0, 0) => PoolHealth::Healthy,
|
||||||
|
(1, 0) => PoolHealth::Degraded, // Can recover with parity
|
||||||
|
(0, 1) => PoolHealth::Degraded, // Lost parity protection
|
||||||
|
_ => PoolHealth::Critical, // Multiple failures
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fallback to legacy configuration-based storage pools
|
||||||
|
fn get_legacy_configured_storage_pools(&self) -> Result<Vec<StoragePool>> {
|
||||||
|
let mut storage_pools = Vec::new();
|
||||||
|
let mut processed_pools = std::collections::HashSet::new();
|
||||||
|
|
||||||
|
// Legacy implementation: use filesystem configuration
|
||||||
|
for fs_config in &self.config.filesystems {
|
||||||
|
if !fs_config.monitor {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let (pool_type, skip_in_single_mode) = self.determine_pool_type(&fs_config.storage_type);
|
||||||
|
|
||||||
|
// Skip member disks if they're part of a pool
|
||||||
|
if skip_in_single_mode {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this pool was already processed (in case of multiple member disks)
|
||||||
|
let pool_key = match &pool_type {
|
||||||
|
StoragePoolType::MergerfsPool { .. } => {
|
||||||
|
// For mergerfs pools, use the main mount point
|
||||||
|
if fs_config.fs_type == "fuse.mergerfs" {
|
||||||
|
fs_config.mount_point.clone()
|
||||||
|
} else {
|
||||||
|
continue; // Skip member disks
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => fs_config.mount_point.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
if processed_pools.contains(&pool_key) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
processed_pools.insert(pool_key.clone());
|
||||||
|
|
||||||
|
// Get filesystem stats for the mount point
|
||||||
|
match self.get_filesystem_info(&fs_config.mount_point) {
|
||||||
|
Ok((total_bytes, used_bytes)) => {
|
||||||
|
let available_bytes = total_bytes - used_bytes;
|
||||||
|
let usage_percent = if total_bytes > 0 {
|
||||||
|
(used_bytes as f64 / total_bytes as f64) * 100.0
|
||||||
|
} else { 0.0 };
|
||||||
|
|
||||||
|
// Convert bytes to human-readable format
|
||||||
|
let size = self.bytes_to_human_readable(total_bytes);
|
||||||
|
let used = self.bytes_to_human_readable(used_bytes);
|
||||||
|
let available = self.bytes_to_human_readable(available_bytes);
|
||||||
|
|
||||||
|
// Get underlying drives based on pool type
|
||||||
|
let underlying_drives = self.get_pool_drives(&pool_type, &fs_config.mount_point)?;
|
||||||
|
|
||||||
|
// Calculate pool health
|
||||||
|
let pool_health = self.calculate_pool_health(&pool_type, &underlying_drives);
|
||||||
|
let drive_count = underlying_drives.len();
|
||||||
|
|
||||||
|
storage_pools.push(StoragePool {
|
||||||
|
name: fs_config.name.clone(),
|
||||||
|
mount_point: fs_config.mount_point.clone(),
|
||||||
|
filesystem: fs_config.fs_type.clone(),
|
||||||
|
pool_type: pool_type.clone(),
|
||||||
|
size,
|
||||||
|
used,
|
||||||
|
available,
|
||||||
|
usage_percent: usage_percent as f32,
|
||||||
|
underlying_drives,
|
||||||
|
pool_health,
|
||||||
|
});
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"Legacy configured storage pool '{}' ({:?}) at {} with {} drives, health: {:?}",
|
||||||
|
fs_config.name, pool_type, fs_config.mount_point, drive_count, pool_health
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!(
|
||||||
|
"Failed to get filesystem info for storage pool '{}': {}",
|
||||||
|
fs_config.name, e
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(storage_pools)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine the storage pool type from configuration
|
||||||
|
fn determine_pool_type(&self, storage_type: &str) -> (StoragePoolType, bool) {
|
||||||
|
match storage_type {
|
||||||
|
"single" => (StoragePoolType::Single, false),
|
||||||
|
"mergerfs_pool" | "mergerfs" => {
|
||||||
|
// Find associated member disks
|
||||||
|
let data_disks = self.find_pool_member_disks("mergerfs_member");
|
||||||
|
let parity_disks = self.find_pool_member_disks("parity");
|
||||||
|
(StoragePoolType::MergerfsPool { data_disks, parity_disks }, false)
|
||||||
|
}
|
||||||
|
"mergerfs_member" => (StoragePoolType::Single, true), // Skip, part of pool
|
||||||
|
"parity" => (StoragePoolType::Single, true), // Skip, part of pool
|
||||||
|
"raid1" | "raid5" | "raid6" => {
|
||||||
|
let member_disks = self.find_pool_member_disks(&format!("{}_member", storage_type));
|
||||||
|
(StoragePoolType::RaidArray {
|
||||||
|
level: storage_type.to_uppercase(),
|
||||||
|
member_disks,
|
||||||
|
spare_disks: Vec::new()
|
||||||
|
}, false)
|
||||||
|
}
|
||||||
|
_ => (StoragePoolType::Single, false) // Default to single
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find member disks for a specific storage type
|
||||||
|
fn find_pool_member_disks(&self, member_type: &str) -> Vec<String> {
|
||||||
|
let mut member_disks = Vec::new();
|
||||||
|
|
||||||
|
for fs_config in &self.config.filesystems {
|
||||||
|
if fs_config.storage_type == member_type && fs_config.monitor {
|
||||||
|
// Get device names for this mount point
|
||||||
|
if let Some(devices) = self.detected_devices.get(&fs_config.mount_point) {
|
||||||
|
member_disks.extend(devices.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
member_disks
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get drive information for a specific pool type
|
||||||
|
fn get_pool_drives(&self, pool_type: &StoragePoolType, mount_point: &str) -> Result<Vec<DriveInfo>> {
|
||||||
|
match pool_type {
|
||||||
|
StoragePoolType::Single => {
|
||||||
|
// Single disk - use detected devices for this mount point
|
||||||
|
let device_names = self.detected_devices.get(mount_point).cloned().unwrap_or_default();
|
||||||
|
self.get_drive_info_for_devices(&device_names)
|
||||||
|
}
|
||||||
|
StoragePoolType::PhysicalDrive { .. } => {
|
||||||
|
// Physical drive - get drive info for the drive directly (mount_point not used)
|
||||||
|
let device_names = vec![mount_point.to_string()];
|
||||||
|
self.get_drive_info_for_devices(&device_names)
|
||||||
|
}
|
||||||
|
StoragePoolType::MergerfsPool { data_disks, parity_disks } => {
|
||||||
|
// Mergerfs pool - collect all member drives
|
||||||
|
let mut all_disks = data_disks.clone();
|
||||||
|
all_disks.extend(parity_disks.clone());
|
||||||
|
self.get_drive_info_for_devices(&all_disks)
|
||||||
|
}
|
||||||
|
StoragePoolType::RaidArray { member_disks, spare_disks, .. } => {
|
||||||
|
// RAID array - collect member and spare drives
|
||||||
|
let mut all_disks = member_disks.clone();
|
||||||
|
all_disks.extend(spare_disks.clone());
|
||||||
|
self.get_drive_info_for_devices(&all_disks)
|
||||||
|
}
|
||||||
|
StoragePoolType::ZfsPool { .. } => {
|
||||||
|
// ZFS pool - use detected devices (future implementation)
|
||||||
|
let device_names = self.detected_devices.get(mount_point).cloned().unwrap_or_default();
|
||||||
|
self.get_drive_info_for_devices(&device_names)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate pool health based on drive status and pool type
|
||||||
|
fn calculate_pool_health(&self, pool_type: &StoragePoolType, drives: &[DriveInfo]) -> PoolHealth {
|
||||||
|
match pool_type {
|
||||||
|
StoragePoolType::Single => {
|
||||||
|
// Single disk - health is just the drive health
|
||||||
|
if drives.is_empty() {
|
||||||
|
PoolHealth::Unknown
|
||||||
|
} else if drives.iter().all(|d| d.health_status == "PASSED") {
|
||||||
|
PoolHealth::Healthy
|
||||||
|
} else {
|
||||||
|
PoolHealth::Critical
|
||||||
|
}
|
||||||
|
}
|
||||||
|
StoragePoolType::PhysicalDrive { .. } => {
|
||||||
|
// Physical drive - health is just the drive health (similar to Single)
|
||||||
|
if drives.is_empty() {
|
||||||
|
PoolHealth::Unknown
|
||||||
|
} else if drives.iter().all(|d| d.health_status == "PASSED") {
|
||||||
|
PoolHealth::Healthy
|
||||||
|
} else {
|
||||||
|
PoolHealth::Critical
|
||||||
|
}
|
||||||
|
}
|
||||||
|
StoragePoolType::MergerfsPool { data_disks, parity_disks } => {
|
||||||
|
let failed_data = drives.iter()
|
||||||
|
.filter(|d| data_disks.contains(&d.device) && d.health_status != "PASSED")
|
||||||
|
.count();
|
||||||
|
let failed_parity = drives.iter()
|
||||||
|
.filter(|d| parity_disks.contains(&d.device) && d.health_status != "PASSED")
|
||||||
|
.count();
|
||||||
|
|
||||||
|
match (failed_data, failed_parity) {
|
||||||
|
(0, 0) => PoolHealth::Healthy,
|
||||||
|
(1, 0) => PoolHealth::Degraded, // Can recover with parity
|
||||||
|
(0, 1) => PoolHealth::Degraded, // Lost parity protection
|
||||||
|
_ => PoolHealth::Critical, // Multiple failures
|
||||||
|
}
|
||||||
|
}
|
||||||
|
StoragePoolType::RaidArray { level, .. } => {
|
||||||
|
let failed_drives = drives.iter().filter(|d| d.health_status != "PASSED").count();
|
||||||
|
|
||||||
|
// Basic RAID health logic (can be enhanced per RAID level)
|
||||||
|
match failed_drives {
|
||||||
|
0 => PoolHealth::Healthy,
|
||||||
|
1 if level.contains('1') || level.contains('5') || level.contains('6') => PoolHealth::Degraded,
|
||||||
|
_ => PoolHealth::Critical,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
StoragePoolType::ZfsPool { .. } => {
|
||||||
|
// ZFS health would require zpool status parsing (future)
|
||||||
|
if drives.iter().all(|d| d.health_status == "PASSED") {
|
||||||
|
PoolHealth::Healthy
|
||||||
|
} else {
|
||||||
|
PoolHealth::Degraded
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get drive information for a list of device names
|
||||||
|
fn get_drive_info_for_devices(&self, device_names: &[String]) -> Result<Vec<DriveInfo>> {
|
||||||
|
let mut drives = Vec::new();
|
||||||
|
|
||||||
|
for device_name in device_names {
|
||||||
|
let device_path = format!("/dev/{}", device_name);
|
||||||
|
|
||||||
|
// Get SMART data for this drive
|
||||||
|
let (health_status, temperature, wear_level) = self.get_smart_data(&device_path);
|
||||||
|
|
||||||
|
drives.push(DriveInfo {
|
||||||
|
device: device_name.clone(),
|
||||||
|
health_status: health_status.clone(),
|
||||||
|
temperature,
|
||||||
|
wear_level,
|
||||||
|
});
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"Drive info for {}: health={}, temp={:?}°C, wear={:?}%",
|
||||||
|
device_name, health_status, temperature, wear_level
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(drives)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get SMART data for a drive (health, temperature, wear level)
|
||||||
|
fn get_smart_data(&self, device_path: &str) -> (String, Option<f32>, Option<f32>) {
|
||||||
|
// Try to get SMART data using smartctl
|
||||||
|
let output = Command::new("sudo")
|
||||||
|
.arg("smartctl")
|
||||||
|
.arg("-a")
|
||||||
|
.arg(device_path)
|
||||||
|
.output();
|
||||||
|
|
||||||
|
match output {
|
||||||
|
Ok(result) if result.status.success() => {
|
||||||
|
let stdout = String::from_utf8_lossy(&result.stdout);
|
||||||
|
|
||||||
|
// Parse health status
|
||||||
|
let health = if stdout.contains("PASSED") {
|
||||||
|
"PASSED".to_string()
|
||||||
|
} else if stdout.contains("FAILED") {
|
||||||
|
"FAILED".to_string()
|
||||||
|
} else {
|
||||||
|
"UNKNOWN".to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Parse temperature (look for various temperature indicators)
|
||||||
|
let temperature = self.parse_temperature_from_smart(&stdout);
|
||||||
|
|
||||||
|
// Parse wear level (for SSDs)
|
||||||
|
let wear_level = self.parse_wear_level_from_smart(&stdout);
|
||||||
|
|
||||||
|
(health, temperature, wear_level)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
debug!("Failed to get SMART data for {}", device_path);
|
||||||
|
("UNKNOWN".to_string(), None, None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse temperature from SMART output
|
||||||
|
fn parse_temperature_from_smart(&self, smart_output: &str) -> Option<f32> {
|
||||||
|
for line in smart_output.lines() {
|
||||||
|
// Look for temperature in various formats
|
||||||
|
if line.contains("Temperature_Celsius") || line.contains("Temperature") {
|
||||||
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if parts.len() >= 10 {
|
||||||
|
if let Ok(temp) = parts[9].parse::<f32>() {
|
||||||
|
return Some(temp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// NVMe drives might show temperature differently
|
||||||
|
if line.contains("temperature:") {
|
||||||
|
if let Some(temp_part) = line.split("temperature:").nth(1) {
|
||||||
|
if let Some(temp_str) = temp_part.split_whitespace().next() {
|
||||||
|
if let Ok(temp) = temp_str.parse::<f32>() {
|
||||||
|
return Some(temp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse wear level from SMART output (SSD wear leveling)
|
||||||
|
/// Supports both NVMe and SATA SSD wear indicators
|
||||||
|
fn parse_wear_level_from_smart(&self, smart_output: &str) -> Option<f32> {
|
||||||
|
for line in smart_output.lines() {
|
||||||
|
let line = line.trim();
|
||||||
|
|
||||||
|
// NVMe drives - direct percentage used
|
||||||
|
if line.contains("Percentage Used:") {
|
||||||
|
if let Some(wear_part) = line.split("Percentage Used:").nth(1) {
|
||||||
|
if let Some(wear_str) = wear_part.split('%').next() {
|
||||||
|
if let Ok(wear) = wear_str.trim().parse::<f32>() {
|
||||||
|
return Some(wear);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SATA SSD attributes - parse SMART table format
|
||||||
|
// Format: ID ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
|
||||||
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if parts.len() >= 10 {
|
||||||
|
// SSD Life Left / Percent Lifetime Remaining (higher = less wear)
|
||||||
|
if line.contains("SSD_Life_Left") || line.contains("Percent_Lifetime_Remain") {
|
||||||
|
if let Ok(remaining) = parts[3].parse::<f32>() { // VALUE column
|
||||||
|
return Some(100.0 - remaining); // Convert remaining to used
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Media Wearout Indicator (lower = more wear, normalize to 0-100)
|
||||||
|
if line.contains("Media_Wearout_Indicator") {
|
||||||
|
if let Ok(remaining) = parts[3].parse::<f32>() { // VALUE column
|
||||||
|
return Some(100.0 - remaining); // Convert remaining to used
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wear Leveling Count (higher = less wear, but varies by manufacturer)
|
||||||
|
if line.contains("Wear_Leveling_Count") {
|
||||||
|
if let Ok(wear_count) = parts[3].parse::<f32>() { // VALUE column
|
||||||
|
// Most SSDs: 100 = new, decreases with wear
|
||||||
|
if wear_count <= 100.0 {
|
||||||
|
return Some(100.0 - wear_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Total LBAs Written - calculate against typical endurance if available
|
||||||
|
// This is more complex and manufacturer-specific, so we skip for now
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert bytes to human-readable format
|
||||||
|
fn bytes_to_human_readable(&self, bytes: u64) -> String {
|
||||||
|
const UNITS: &[&str] = &["B", "K", "M", "G", "T"];
|
||||||
|
let mut size = bytes as f64;
|
||||||
|
let mut unit_index = 0;
|
||||||
|
|
||||||
|
while size >= 1024.0 && unit_index < UNITS.len() - 1 {
|
||||||
|
size /= 1024.0;
|
||||||
|
unit_index += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if unit_index == 0 {
|
||||||
|
format!("{:.0}{}", size, UNITS[unit_index])
|
||||||
|
} else {
|
||||||
|
format!("{:.1}{}", size, UNITS[unit_index])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert bytes to gigabytes
|
||||||
|
fn bytes_to_gb(&self, bytes: u64) -> f32 {
|
||||||
|
bytes as f32 / (1024.0 * 1024.0 * 1024.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Detect device backing a mount point using lsblk (static version for startup)
|
||||||
|
fn detect_device_for_mount_point_static(mount_point: &str) -> Result<Vec<String>> {
|
||||||
|
let output = Command::new("lsblk")
|
||||||
|
.args(&["-n", "-o", "NAME,MOUNTPOINT"])
|
||||||
|
.output()?;
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
return Ok(Vec::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
|
||||||
|
for line in output_str.lines() {
|
||||||
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if parts.len() >= 2 && parts[1] == mount_point {
|
||||||
|
// Remove tree symbols and extract device name (e.g., "├─nvme0n1p2" -> "nvme0n1p2")
|
||||||
|
let device_name = parts[0]
|
||||||
|
.trim_start_matches('├')
|
||||||
|
.trim_start_matches('└')
|
||||||
|
.trim_start_matches('─')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
// Extract base device name (e.g., "nvme0n1p2" -> "nvme0n1")
|
||||||
|
if let Some(base_device) = Self::extract_base_device(device_name) {
|
||||||
|
return Ok(vec![base_device]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Vec::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract base device name from partition (e.g., "nvme0n1p2" -> "nvme0n1", "sda1" -> "sda")
|
||||||
|
fn extract_base_device(device_name: &str) -> Option<String> {
|
||||||
|
// Handle NVMe devices (nvme0n1p1 -> nvme0n1)
|
||||||
|
if device_name.starts_with("nvme") {
|
||||||
|
if let Some(p_pos) = device_name.find('p') {
|
||||||
|
return Some(device_name[..p_pos].to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle traditional devices (sda1 -> sda)
|
||||||
|
if device_name.len() > 1 {
|
||||||
|
let chars: Vec<char> = device_name.chars().collect();
|
||||||
|
let mut end_idx = chars.len();
|
||||||
|
|
||||||
|
// Find where the device name ends and partition number begins
|
||||||
|
for (i, &c) in chars.iter().enumerate().rev() {
|
||||||
|
if !c.is_ascii_digit() {
|
||||||
|
end_idx = i + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if end_idx > 0 && end_idx < chars.len() {
|
||||||
|
return Some(chars[..end_idx].iter().collect());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no partition detected, return as-is
|
||||||
|
Some(device_name.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Get filesystem info using df command
|
||||||
|
fn get_filesystem_info(&self, path: &str) -> Result<(u64, u64)> {
|
||||||
|
let output = Command::new("df")
|
||||||
|
.arg("--block-size=1")
|
||||||
|
.arg(path)
|
||||||
|
.output()?;
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
return Err(anyhow::anyhow!("df command failed for {}", path));
|
||||||
|
}
|
||||||
|
|
||||||
|
let output_str = String::from_utf8(output.stdout)?;
|
||||||
|
let lines: Vec<&str> = output_str.lines().collect();
|
||||||
|
|
||||||
|
if lines.len() < 2 {
|
||||||
|
return Err(anyhow::anyhow!("Unexpected df output format"));
|
||||||
|
}
|
||||||
|
|
||||||
|
let fields: Vec<&str> = lines[1].split_whitespace().collect();
|
||||||
|
if fields.len() < 4 {
|
||||||
|
return Err(anyhow::anyhow!("Unexpected df fields count"));
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_bytes = fields[1].parse::<u64>()?;
|
||||||
|
let used_bytes = fields[2].parse::<u64>()?;
|
||||||
|
|
||||||
|
Ok((total_bytes, used_bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Parse size string (e.g., "120G", "45M") to GB value
|
||||||
|
fn parse_size_to_gb(&self, size_str: &str) -> f32 {
|
||||||
|
let size_str = size_str.trim();
|
||||||
|
if size_str.is_empty() || size_str == "-" {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract numeric part and unit
|
||||||
|
let (num_str, unit) = if let Some(last_char) = size_str.chars().last() {
|
||||||
|
if last_char.is_alphabetic() {
|
||||||
|
let num_part = &size_str[..size_str.len() - 1];
|
||||||
|
let unit_part = &size_str[size_str.len() - 1..];
|
||||||
|
(num_part, unit_part)
|
||||||
|
} else {
|
||||||
|
(size_str, "")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
(size_str, "")
|
||||||
|
};
|
||||||
|
|
||||||
|
let number: f32 = num_str.parse().unwrap_or(0.0);
|
||||||
|
|
||||||
|
match unit.to_uppercase().as_str() {
|
||||||
|
"T" | "TB" => number * 1024.0,
|
||||||
|
"G" | "GB" => number,
|
||||||
|
"M" | "MB" => number / 1024.0,
|
||||||
|
"K" | "KB" => number / (1024.0 * 1024.0),
|
||||||
|
"B" | "" => number / (1024.0 * 1024.0 * 1024.0),
|
||||||
|
_ => number, // Assume GB if unknown unit
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Collector for DiskCollector {
|
||||||
|
|
||||||
|
async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
||||||
|
let start_time = Instant::now();
|
||||||
|
debug!("Collecting storage pool and individual drive metrics");
|
||||||
|
|
||||||
|
let mut metrics = Vec::new();
|
||||||
|
|
||||||
|
// Get configured storage pools with individual drive data
|
||||||
|
let storage_pools = match self.get_configured_storage_pools() {
|
||||||
|
Ok(pools) => {
|
||||||
|
debug!("Found {} storage pools", pools.len());
|
||||||
|
pools
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!("Failed to get storage pools: {}", e);
|
||||||
|
Vec::new()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Generate metrics for each storage pool and its underlying drives
|
||||||
|
for storage_pool in &storage_pools {
|
||||||
|
let timestamp = chrono::Utc::now().timestamp() as u64;
|
||||||
|
|
||||||
|
// Storage pool overall metrics
|
||||||
|
let pool_name = &storage_pool.name;
|
||||||
|
|
||||||
|
// Parse size strings to get actual values for calculations
|
||||||
|
let size_gb = self.parse_size_to_gb(&storage_pool.size);
|
||||||
|
let used_gb = self.parse_size_to_gb(&storage_pool.used);
|
||||||
|
let avail_gb = self.parse_size_to_gb(&storage_pool.available);
|
||||||
|
|
||||||
|
// Calculate status based on configured thresholds and pool health
|
||||||
|
let usage_status = if storage_pool.usage_percent >= self.config.usage_critical_percent {
|
||||||
|
Status::Critical
|
||||||
|
} else if storage_pool.usage_percent >= self.config.usage_warning_percent {
|
||||||
|
Status::Warning
|
||||||
|
} else {
|
||||||
|
Status::Ok
|
||||||
|
};
|
||||||
|
|
||||||
|
let pool_status = match storage_pool.pool_health {
|
||||||
|
PoolHealth::Critical => Status::Critical,
|
||||||
|
PoolHealth::Degraded => Status::Warning,
|
||||||
|
PoolHealth::Rebuilding => Status::Warning,
|
||||||
|
PoolHealth::Healthy => usage_status,
|
||||||
|
PoolHealth::Unknown => Status::Unknown,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Storage pool info metrics
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_mount_point", pool_name),
|
||||||
|
value: MetricValue::String(storage_pool.mount_point.clone()),
|
||||||
|
unit: None,
|
||||||
|
description: Some(format!("Mount: {}", storage_pool.mount_point)),
|
||||||
|
status: Status::Ok,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_filesystem", pool_name),
|
||||||
|
value: MetricValue::String(storage_pool.filesystem.clone()),
|
||||||
|
unit: None,
|
||||||
|
description: Some(format!("FS: {}", storage_pool.filesystem)),
|
||||||
|
status: Status::Ok,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Enhanced pool type information
|
||||||
|
let pool_type_str = match &storage_pool.pool_type {
|
||||||
|
StoragePoolType::Single => "single".to_string(),
|
||||||
|
StoragePoolType::PhysicalDrive { filesystems } => {
|
||||||
|
format!("drive ({})", filesystems.len())
|
||||||
|
}
|
||||||
|
StoragePoolType::MergerfsPool { data_disks, parity_disks } => {
|
||||||
|
format!("mergerfs ({}+{})", data_disks.len(), parity_disks.len())
|
||||||
|
}
|
||||||
|
StoragePoolType::RaidArray { level, member_disks, spare_disks } => {
|
||||||
|
format!("{} ({}+{})", level, member_disks.len(), spare_disks.len())
|
||||||
|
}
|
||||||
|
StoragePoolType::ZfsPool { pool_name, .. } => {
|
||||||
|
format!("zfs ({})", pool_name)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_pool_type", pool_name),
|
||||||
|
value: MetricValue::String(pool_type_str.clone()),
|
||||||
|
unit: None,
|
||||||
|
description: Some(format!("Type: {}", pool_type_str)),
|
||||||
|
status: Status::Ok,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Pool health status
|
||||||
|
let health_str = match storage_pool.pool_health {
|
||||||
|
PoolHealth::Healthy => "healthy",
|
||||||
|
PoolHealth::Degraded => "degraded",
|
||||||
|
PoolHealth::Critical => "critical",
|
||||||
|
PoolHealth::Rebuilding => "rebuilding",
|
||||||
|
PoolHealth::Unknown => "unknown",
|
||||||
|
};
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_pool_health", pool_name),
|
||||||
|
value: MetricValue::String(health_str.to_string()),
|
||||||
|
unit: None,
|
||||||
|
description: Some(format!("Health: {}", health_str)),
|
||||||
|
status: pool_status,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Storage pool size metrics
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_total_gb", pool_name),
|
||||||
|
value: MetricValue::Float(size_gb),
|
||||||
|
unit: Some("GB".to_string()),
|
||||||
|
description: Some(format!("Total: {}", storage_pool.size)),
|
||||||
|
status: Status::Ok,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_used_gb", pool_name),
|
||||||
|
value: MetricValue::Float(used_gb),
|
||||||
|
unit: Some("GB".to_string()),
|
||||||
|
description: Some(format!("Used: {}", storage_pool.used)),
|
||||||
|
status: pool_status,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_available_gb", pool_name),
|
||||||
|
value: MetricValue::Float(avail_gb),
|
||||||
|
unit: Some("GB".to_string()),
|
||||||
|
description: Some(format!("Available: {}", storage_pool.available)),
|
||||||
|
status: Status::Ok,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_usage_percent", pool_name),
|
||||||
|
value: MetricValue::Float(storage_pool.usage_percent),
|
||||||
|
unit: Some("%".to_string()),
|
||||||
|
description: Some(format!("Usage: {:.1}%", storage_pool.usage_percent)),
|
||||||
|
status: pool_status,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Individual drive metrics for this storage pool
|
||||||
|
for drive in &storage_pool.underlying_drives {
|
||||||
|
// Drive health status
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_{}_health", pool_name, drive.device),
|
||||||
|
value: MetricValue::String(drive.health_status.clone()),
|
||||||
|
unit: None,
|
||||||
|
description: Some(format!("{}: {}", drive.device, drive.health_status)),
|
||||||
|
status: if drive.health_status == "PASSED" { Status::Ok }
|
||||||
|
else if drive.health_status == "FAILED" { Status::Critical }
|
||||||
|
else { Status::Unknown },
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Drive temperature
|
||||||
|
if let Some(temp) = drive.temperature {
|
||||||
|
let temp_status = self.calculate_temperature_status(
|
||||||
|
&format!("disk_{}_{}_temperature", pool_name, drive.device),
|
||||||
|
temp,
|
||||||
|
status_tracker
|
||||||
|
);
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_{}_temperature", pool_name, drive.device),
|
||||||
|
value: MetricValue::Float(temp),
|
||||||
|
unit: Some("°C".to_string()),
|
||||||
|
description: Some(format!("{}: {:.0}°C", drive.device, temp)),
|
||||||
|
status: temp_status,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drive wear level (for SSDs)
|
||||||
|
if let Some(wear) = drive.wear_level {
|
||||||
|
let wear_status = if wear >= self.config.wear_critical_percent { Status::Critical }
|
||||||
|
else if wear >= self.config.wear_warning_percent { Status::Warning }
|
||||||
|
else { Status::Ok };
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_{}_wear_percent", pool_name, drive.device),
|
||||||
|
value: MetricValue::Float(wear),
|
||||||
|
unit: Some("%".to_string()),
|
||||||
|
description: Some(format!("{}: {:.0}% wear", drive.device, wear)),
|
||||||
|
status: wear_status,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Individual filesystem metrics for PhysicalDrive pools
|
||||||
|
if let StoragePoolType::PhysicalDrive { filesystems } = &storage_pool.pool_type {
|
||||||
|
for filesystem_mount in filesystems {
|
||||||
|
if let Ok((total_bytes, used_bytes)) = self.get_filesystem_info(filesystem_mount) {
|
||||||
|
let available_bytes = total_bytes - used_bytes;
|
||||||
|
let usage_percent = if total_bytes > 0 {
|
||||||
|
(used_bytes as f64 / total_bytes as f64) * 100.0
|
||||||
|
} else { 0.0 };
|
||||||
|
|
||||||
|
let filesystem_name = if filesystem_mount == "/" {
|
||||||
|
"root".to_string()
|
||||||
|
} else {
|
||||||
|
filesystem_mount.trim_start_matches('/').replace('/', "_")
|
||||||
|
};
|
||||||
|
|
||||||
|
// Calculate filesystem status based on usage
|
||||||
|
let fs_status = if usage_percent >= self.config.usage_critical_percent as f64 {
|
||||||
|
Status::Critical
|
||||||
|
} else if usage_percent >= self.config.usage_warning_percent as f64 {
|
||||||
|
Status::Warning
|
||||||
|
} else {
|
||||||
|
Status::Ok
|
||||||
|
};
|
||||||
|
|
||||||
|
// Filesystem usage metrics
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_fs_{}_usage_percent", pool_name, filesystem_name),
|
||||||
|
value: MetricValue::Float(usage_percent as f32),
|
||||||
|
unit: Some("%".to_string()),
|
||||||
|
description: Some(format!("{}: {:.0}%", filesystem_mount, usage_percent)),
|
||||||
|
status: fs_status.clone(),
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_fs_{}_used_gb", pool_name, filesystem_name),
|
||||||
|
value: MetricValue::Float(self.bytes_to_gb(used_bytes)),
|
||||||
|
unit: Some("GB".to_string()),
|
||||||
|
description: Some(format!("{}: {}GB used", filesystem_mount, self.bytes_to_human_readable(used_bytes))),
|
||||||
|
status: Status::Ok,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_fs_{}_total_gb", pool_name, filesystem_name),
|
||||||
|
value: MetricValue::Float(self.bytes_to_gb(total_bytes)),
|
||||||
|
unit: Some("GB".to_string()),
|
||||||
|
description: Some(format!("{}: {}GB total", filesystem_mount, self.bytes_to_human_readable(total_bytes))),
|
||||||
|
status: Status::Ok,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_fs_{}_available_gb", pool_name, filesystem_name),
|
||||||
|
value: MetricValue::Float(self.bytes_to_gb(available_bytes)),
|
||||||
|
unit: Some("GB".to_string()),
|
||||||
|
description: Some(format!("{}: {}GB available", filesystem_mount, self.bytes_to_human_readable(available_bytes))),
|
||||||
|
status: Status::Ok,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: format!("disk_{}_fs_{}_mount_point", pool_name, filesystem_name),
|
||||||
|
value: MetricValue::String(filesystem_mount.clone()),
|
||||||
|
unit: None,
|
||||||
|
description: Some(format!("Mount: {}", filesystem_mount)),
|
||||||
|
status: Status::Ok,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add storage pool count metric
|
||||||
|
metrics.push(Metric {
|
||||||
|
name: "disk_count".to_string(),
|
||||||
|
value: MetricValue::Integer(storage_pools.len() as i64),
|
||||||
|
unit: None,
|
||||||
|
description: Some(format!("Total storage pools: {}", storage_pools.len())),
|
||||||
|
status: Status::Ok,
|
||||||
|
timestamp: chrono::Utc::now().timestamp() as u64,
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
let collection_time = start_time.elapsed();
|
||||||
|
debug!(
|
||||||
|
"Multi-disk collection completed in {:?} with {} metrics",
|
||||||
|
collection_time,
|
||||||
|
metrics.len()
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(metrics)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,5 +1,5 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use cm_dashboard_shared::{registry, Metric, MetricValue, Status, StatusTracker, HysteresisThresholds};
|
use cm_dashboard_shared::{AgentData, TmpfsData, HysteresisThresholds, Status};
|
||||||
|
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
@ -10,34 +10,19 @@ use crate::config::MemoryConfig;
|
|||||||
///
|
///
|
||||||
/// EFFICIENCY OPTIMIZATIONS:
|
/// EFFICIENCY OPTIMIZATIONS:
|
||||||
/// - Single /proc/meminfo read for all memory metrics
|
/// - Single /proc/meminfo read for all memory metrics
|
||||||
/// - Minimal string parsing with split operations
|
/// - Minimal string allocations
|
||||||
/// - Pre-calculated KB to GB conversion
|
/// - No process spawning for basic metrics
|
||||||
/// - No regex or complex parsing
|
/// - <0.5ms collection time target
|
||||||
/// - <0.1ms collection time target
|
|
||||||
pub struct MemoryCollector {
|
pub struct MemoryCollector {
|
||||||
usage_thresholds: HysteresisThresholds,
|
usage_thresholds: HysteresisThresholds,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Memory information parsed from /proc/meminfo
|
|
||||||
#[derive(Debug, Default)]
|
|
||||||
struct MemoryInfo {
|
|
||||||
total_kb: u64,
|
|
||||||
available_kb: u64,
|
|
||||||
free_kb: u64,
|
|
||||||
buffers_kb: u64,
|
|
||||||
cached_kb: u64,
|
|
||||||
swap_total_kb: u64,
|
|
||||||
swap_free_kb: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MemoryCollector {
|
impl MemoryCollector {
|
||||||
pub fn new(config: MemoryConfig) -> Self {
|
pub fn new(config: MemoryConfig) -> Self {
|
||||||
// Create hysteresis thresholds with 5% gap for memory usage
|
// Create hysteresis thresholds with 10% gap for recovery
|
||||||
let usage_thresholds = HysteresisThresholds::with_custom_gaps(
|
let usage_thresholds = HysteresisThresholds::new(
|
||||||
config.usage_warning_percent,
|
config.usage_warning_percent,
|
||||||
5.0, // 5% gap for warning recovery
|
|
||||||
config.usage_critical_percent,
|
config.usage_critical_percent,
|
||||||
5.0, // 5% gap for critical recovery
|
|
||||||
);
|
);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
@ -45,11 +30,6 @@ impl MemoryCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate memory usage status using hysteresis thresholds
|
|
||||||
fn calculate_usage_status(&self, metric_name: &str, usage_percent: f32, status_tracker: &mut StatusTracker) -> Status {
|
|
||||||
status_tracker.calculate_with_hysteresis(metric_name, usage_percent, &self.usage_thresholds)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse /proc/meminfo efficiently
|
/// Parse /proc/meminfo efficiently
|
||||||
/// Format: "MemTotal: 16384000 kB"
|
/// Format: "MemTotal: 16384000 kB"
|
||||||
async fn parse_meminfo(&self) -> Result<MemoryInfo, CollectorError> {
|
async fn parse_meminfo(&self) -> Result<MemoryInfo, CollectorError> {
|
||||||
@ -96,209 +76,144 @@ impl MemoryCollector {
|
|||||||
Ok(info)
|
Ok(info)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convert KB to GB efficiently (avoiding floating point in hot path)
|
/// Populate memory data directly into AgentData
|
||||||
fn kb_to_gb(kb: u64) -> f32 {
|
async fn populate_memory_data(&self, info: &MemoryInfo, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
kb as f32 / 1_048_576.0 // 1024 * 1024
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Calculate memory metrics from parsed info
|
|
||||||
fn calculate_metrics(&self, info: &MemoryInfo, status_tracker: &mut StatusTracker) -> Vec<Metric> {
|
|
||||||
let mut metrics = Vec::with_capacity(6);
|
|
||||||
|
|
||||||
// Calculate derived values
|
// Calculate derived values
|
||||||
let used_kb = info.total_kb - info.available_kb;
|
let available = info.available_kb;
|
||||||
let usage_percent = (used_kb as f32 / info.total_kb as f32) * 100.0;
|
let used = info.total_kb - available;
|
||||||
let usage_status = self.calculate_usage_status(registry::MEMORY_USAGE_PERCENT, usage_percent, status_tracker);
|
let usage_percent = (used as f32 / info.total_kb as f32) * 100.0;
|
||||||
|
|
||||||
let swap_used_kb = info.swap_total_kb - info.swap_free_kb;
|
// Populate basic memory fields
|
||||||
|
agent_data.system.memory.usage_percent = usage_percent;
|
||||||
|
agent_data.system.memory.total_gb = info.total_kb as f32 / (1024.0 * 1024.0);
|
||||||
|
agent_data.system.memory.used_gb = used as f32 / (1024.0 * 1024.0);
|
||||||
|
|
||||||
// Convert to GB for metrics
|
// Populate swap data if available
|
||||||
let total_gb = Self::kb_to_gb(info.total_kb);
|
agent_data.system.memory.swap_total_gb = info.swap_total_kb as f32 / (1024.0 * 1024.0);
|
||||||
let used_gb = Self::kb_to_gb(used_kb);
|
agent_data.system.memory.swap_used_gb = (info.swap_total_kb - info.swap_free_kb) as f32 / (1024.0 * 1024.0);
|
||||||
let available_gb = Self::kb_to_gb(info.available_kb);
|
|
||||||
let swap_total_gb = Self::kb_to_gb(info.swap_total_kb);
|
|
||||||
let swap_used_gb = Self::kb_to_gb(swap_used_kb);
|
|
||||||
|
|
||||||
// Memory usage percentage (primary metric with status)
|
Ok(())
|
||||||
metrics.push(
|
|
||||||
Metric::new(
|
|
||||||
registry::MEMORY_USAGE_PERCENT.to_string(),
|
|
||||||
MetricValue::Float(usage_percent),
|
|
||||||
usage_status,
|
|
||||||
)
|
|
||||||
.with_description("Memory usage percentage".to_string())
|
|
||||||
.with_unit("%".to_string()),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Total memory
|
|
||||||
metrics.push(
|
|
||||||
Metric::new(
|
|
||||||
registry::MEMORY_TOTAL_GB.to_string(),
|
|
||||||
MetricValue::Float(total_gb),
|
|
||||||
Status::Ok, // Total memory doesn't have status
|
|
||||||
)
|
|
||||||
.with_description("Total system memory".to_string())
|
|
||||||
.with_unit("GB".to_string()),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Used memory
|
|
||||||
metrics.push(
|
|
||||||
Metric::new(
|
|
||||||
registry::MEMORY_USED_GB.to_string(),
|
|
||||||
MetricValue::Float(used_gb),
|
|
||||||
Status::Ok, // Used memory absolute value doesn't have status
|
|
||||||
)
|
|
||||||
.with_description("Used system memory".to_string())
|
|
||||||
.with_unit("GB".to_string()),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Available memory
|
|
||||||
metrics.push(
|
|
||||||
Metric::new(
|
|
||||||
registry::MEMORY_AVAILABLE_GB.to_string(),
|
|
||||||
MetricValue::Float(available_gb),
|
|
||||||
Status::Ok, // Available memory absolute value doesn't have status
|
|
||||||
)
|
|
||||||
.with_description("Available system memory".to_string())
|
|
||||||
.with_unit("GB".to_string()),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Swap metrics (only if swap exists)
|
|
||||||
if info.swap_total_kb > 0 {
|
|
||||||
metrics.push(
|
|
||||||
Metric::new(
|
|
||||||
registry::MEMORY_SWAP_TOTAL_GB.to_string(),
|
|
||||||
MetricValue::Float(swap_total_gb),
|
|
||||||
Status::Ok,
|
|
||||||
)
|
|
||||||
.with_description("Total swap space".to_string())
|
|
||||||
.with_unit("GB".to_string()),
|
|
||||||
);
|
|
||||||
|
|
||||||
metrics.push(
|
|
||||||
Metric::new(
|
|
||||||
registry::MEMORY_SWAP_USED_GB.to_string(),
|
|
||||||
MetricValue::Float(swap_used_gb),
|
|
||||||
Status::Ok,
|
|
||||||
)
|
|
||||||
.with_description("Used swap space".to_string())
|
|
||||||
.with_unit("GB".to_string()),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Monitor tmpfs (/tmp) usage
|
|
||||||
if let Ok(tmpfs_metrics) = self.get_tmpfs_metrics() {
|
|
||||||
metrics.extend(tmpfs_metrics);
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get tmpfs (/tmp) usage metrics
|
/// Populate tmpfs data into AgentData
|
||||||
fn get_tmpfs_metrics(&self) -> Result<Vec<Metric>, CollectorError> {
|
async fn populate_tmpfs_data(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
use std::process::Command;
|
// Discover all tmpfs mount points
|
||||||
|
let tmpfs_mounts = self.discover_tmpfs_mounts()?;
|
||||||
|
|
||||||
let output = Command::new("df")
|
if tmpfs_mounts.is_empty() {
|
||||||
.arg("--block-size=1")
|
debug!("No tmpfs mounts found to monitor");
|
||||||
.arg("/tmp")
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get usage data for all tmpfs mounts at once using df (with 2 second timeout)
|
||||||
|
let mut df_args = vec!["2", "df", "--output=target,size,used", "--block-size=1"];
|
||||||
|
df_args.extend(tmpfs_mounts.iter().map(|s| s.as_str()));
|
||||||
|
|
||||||
|
let df_output = std::process::Command::new("timeout")
|
||||||
|
.args(&df_args[..])
|
||||||
.output()
|
.output()
|
||||||
.map_err(|e| CollectorError::SystemRead {
|
.map_err(|e| CollectorError::SystemRead {
|
||||||
path: "/tmp".to_string(),
|
path: "tmpfs mounts".to_string(),
|
||||||
error: e.to_string(),
|
error: e.to_string(),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
if !output.status.success() {
|
let df_str = String::from_utf8_lossy(&df_output.stdout);
|
||||||
return Ok(Vec::new()); // Return empty if /tmp not available
|
let df_lines: Vec<&str> = df_str.lines().skip(1).collect(); // Skip header
|
||||||
|
|
||||||
|
// Process each tmpfs mount
|
||||||
|
for (i, mount_point) in tmpfs_mounts.iter().enumerate() {
|
||||||
|
if i >= df_lines.len() {
|
||||||
|
debug!("Not enough df output lines for tmpfs mount: {}", mount_point);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let parts: Vec<&str> = df_lines[i].split_whitespace().collect();
|
||||||
|
if parts.len() < 3 {
|
||||||
|
debug!("Invalid df output for tmpfs mount: {}", mount_point);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_bytes: u64 = parts[1].parse().unwrap_or(0);
|
||||||
|
let used_bytes: u64 = parts[2].parse().unwrap_or(0);
|
||||||
|
|
||||||
|
if total_bytes == 0 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_gb = total_bytes as f32 / (1024.0 * 1024.0 * 1024.0);
|
||||||
|
let used_gb = used_bytes as f32 / (1024.0 * 1024.0 * 1024.0);
|
||||||
|
let usage_percent = (used_bytes as f32 / total_bytes as f32) * 100.0;
|
||||||
|
|
||||||
|
// Add to tmpfs list
|
||||||
|
agent_data.system.memory.tmpfs.push(TmpfsData {
|
||||||
|
mount: mount_point.clone(),
|
||||||
|
usage_percent,
|
||||||
|
used_gb,
|
||||||
|
total_gb,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let output_str = String::from_utf8(output.stdout)
|
// Sort tmpfs mounts by mount point for consistent display order
|
||||||
.map_err(|e| CollectorError::Parse {
|
agent_data.system.memory.tmpfs.sort_by(|a, b| a.mount.cmp(&b.mount));
|
||||||
value: "df output".to_string(),
|
|
||||||
error: e.to_string(),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let lines: Vec<&str> = output_str.lines().collect();
|
Ok(())
|
||||||
if lines.len() < 2 {
|
}
|
||||||
return Ok(Vec::new());
|
|
||||||
|
/// Discover all tmpfs mount points from /proc/mounts
|
||||||
|
fn discover_tmpfs_mounts(&self) -> Result<Vec<String>, CollectorError> {
|
||||||
|
let content = utils::read_proc_file("/proc/mounts")?;
|
||||||
|
let mut tmpfs_mounts = Vec::new();
|
||||||
|
|
||||||
|
for line in content.lines() {
|
||||||
|
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if fields.len() >= 3 && fields[2] == "tmpfs" {
|
||||||
|
let mount_point = fields[1];
|
||||||
|
|
||||||
|
// Filter out system/internal tmpfs mounts that aren't useful for monitoring
|
||||||
|
if self.should_monitor_tmpfs(mount_point) {
|
||||||
|
tmpfs_mounts.push(mount_point.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let fields: Vec<&str> = lines[1].split_whitespace().collect();
|
debug!("Discovered {} tmpfs mounts: {:?}", tmpfs_mounts.len(), tmpfs_mounts);
|
||||||
if fields.len() < 4 {
|
Ok(tmpfs_mounts)
|
||||||
return Ok(Vec::new());
|
}
|
||||||
}
|
|
||||||
|
|
||||||
let total_bytes: u64 = fields[1].parse()
|
/// Determine if a tmpfs mount point should be monitored
|
||||||
.map_err(|e: std::num::ParseIntError| CollectorError::Parse {
|
fn should_monitor_tmpfs(&self, mount_point: &str) -> bool {
|
||||||
value: fields[1].to_string(),
|
// Include commonly useful tmpfs mounts
|
||||||
error: e.to_string(),
|
matches!(mount_point,
|
||||||
})?;
|
"/tmp" | "/var/tmp" | "/dev/shm" | "/run" | "/var/log"
|
||||||
let used_bytes: u64 = fields[2].parse()
|
) || mount_point.starts_with("/run/user/") // User session tmpfs
|
||||||
.map_err(|e: std::num::ParseIntError| CollectorError::Parse {
|
}
|
||||||
value: fields[2].to_string(),
|
|
||||||
error: e.to_string(),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let total_gb = total_bytes as f32 / (1024.0 * 1024.0 * 1024.0);
|
/// Calculate memory usage status based on thresholds
|
||||||
let used_gb = used_bytes as f32 / (1024.0 * 1024.0 * 1024.0);
|
fn calculate_memory_status(&self, usage_percent: f32) -> Status {
|
||||||
let usage_percent = if total_bytes > 0 {
|
self.usage_thresholds.evaluate(usage_percent)
|
||||||
(used_bytes as f32 / total_bytes as f32) * 100.0
|
|
||||||
} else {
|
|
||||||
0.0
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut metrics = Vec::new();
|
|
||||||
let timestamp = chrono::Utc::now().timestamp() as u64;
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "memory_tmp_usage_percent".to_string(),
|
|
||||||
value: MetricValue::Float(usage_percent),
|
|
||||||
unit: Some("%".to_string()),
|
|
||||||
description: Some("tmpfs /tmp usage percentage".to_string()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "memory_tmp_used_gb".to_string(),
|
|
||||||
value: MetricValue::Float(used_gb),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
description: Some("tmpfs /tmp used space".to_string()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "memory_tmp_total_gb".to_string(),
|
|
||||||
value: MetricValue::Float(total_gb),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
description: Some("tmpfs /tmp total space".to_string()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(metrics)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl Collector for MemoryCollector {
|
impl Collector for MemoryCollector {
|
||||||
|
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
|
||||||
debug!("Collecting memory metrics");
|
debug!("Collecting memory metrics");
|
||||||
let start = std::time::Instant::now();
|
let start = std::time::Instant::now();
|
||||||
|
|
||||||
|
// Clear tmpfs list to prevent duplicates when updating cached data
|
||||||
|
agent_data.system.memory.tmpfs.clear();
|
||||||
|
|
||||||
// Parse memory info from /proc/meminfo
|
// Parse memory info from /proc/meminfo
|
||||||
let info = self.parse_meminfo().await?;
|
let info = self.parse_meminfo().await?;
|
||||||
|
|
||||||
// Calculate all metrics from parsed info
|
// Populate memory data directly
|
||||||
let metrics = self.calculate_metrics(&info, status_tracker);
|
self.populate_memory_data(&info, agent_data).await?;
|
||||||
|
|
||||||
|
// Collect tmpfs data
|
||||||
|
self.populate_tmpfs_data(agent_data).await?;
|
||||||
|
|
||||||
let duration = start.elapsed();
|
let duration = start.elapsed();
|
||||||
debug!(
|
debug!("Memory collection completed in {:?}", duration);
|
||||||
"Memory collection completed in {:?} with {} metrics",
|
|
||||||
duration,
|
|
||||||
metrics.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
// Efficiency check: warn if collection takes too long
|
// Efficiency check: warn if collection takes too long
|
||||||
if duration.as_millis() > 1 {
|
if duration.as_millis() > 1 {
|
||||||
@ -308,10 +223,21 @@ impl Collector for MemoryCollector {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store performance metrics
|
// Calculate status using thresholds
|
||||||
// Performance tracking handled by cache system
|
agent_data.system.memory.usage_status = self.calculate_memory_status(agent_data.system.memory.usage_percent);
|
||||||
|
|
||||||
Ok(metrics)
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Internal structure for parsing /proc/meminfo
|
||||||
|
#[derive(Default)]
|
||||||
|
struct MemoryInfo {
|
||||||
|
total_kb: u64,
|
||||||
|
available_kb: u64,
|
||||||
|
free_kb: u64,
|
||||||
|
buffers_kb: u64,
|
||||||
|
cached_kb: u64,
|
||||||
|
swap_total_kb: u64,
|
||||||
|
swap_free_kb: u64,
|
||||||
|
}
|
||||||
@ -1,25 +1,57 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use cm_dashboard_shared::{Metric, StatusTracker};
|
use cm_dashboard_shared::{AgentData};
|
||||||
|
use std::process::Output;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
pub mod backup;
|
pub mod backup;
|
||||||
pub mod cpu;
|
pub mod cpu;
|
||||||
pub mod disk;
|
pub mod disk;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod memory;
|
pub mod memory;
|
||||||
|
pub mod network;
|
||||||
pub mod nixos;
|
pub mod nixos;
|
||||||
pub mod systemd;
|
pub mod systemd;
|
||||||
|
|
||||||
pub use error::CollectorError;
|
pub use error::CollectorError;
|
||||||
|
|
||||||
|
/// Run a command with a timeout to prevent blocking
|
||||||
|
/// Properly kills the process if timeout is exceeded
|
||||||
|
pub async fn run_command_with_timeout(mut cmd: tokio::process::Command, timeout_secs: u64) -> std::io::Result<Output> {
|
||||||
|
use tokio::time::timeout;
|
||||||
|
use std::process::Stdio;
|
||||||
|
let timeout_duration = Duration::from_secs(timeout_secs);
|
||||||
|
|
||||||
/// Base trait for all collectors with extreme efficiency requirements
|
// Configure stdio to capture output
|
||||||
|
cmd.stdout(Stdio::piped());
|
||||||
|
cmd.stderr(Stdio::piped());
|
||||||
|
|
||||||
|
let child = cmd.spawn()?;
|
||||||
|
let pid = child.id();
|
||||||
|
|
||||||
|
match timeout(timeout_duration, child.wait_with_output()).await {
|
||||||
|
Ok(result) => result,
|
||||||
|
Err(_) => {
|
||||||
|
// Timeout - force kill the process using system kill command
|
||||||
|
if let Some(process_id) = pid {
|
||||||
|
let _ = tokio::process::Command::new("kill")
|
||||||
|
.args(&["-9", &process_id.to_string()])
|
||||||
|
.output()
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::TimedOut,
|
||||||
|
format!("Command timed out after {} seconds", timeout_secs)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Base trait for all collectors with direct structured data output
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait Collector: Send + Sync {
|
pub trait Collector: Send + Sync {
|
||||||
/// Collect all metrics this collector provides
|
/// Collect data and populate AgentData directly with status evaluation
|
||||||
async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError>;
|
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError>;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// CPU efficiency rules for all collectors
|
/// CPU efficiency rules for all collectors
|
||||||
|
|||||||
225
agent/src/collectors/network.rs
Normal file
225
agent/src/collectors/network.rs
Normal file
@ -0,0 +1,225 @@
|
|||||||
|
use async_trait::async_trait;
|
||||||
|
use cm_dashboard_shared::{AgentData, NetworkInterfaceData, Status};
|
||||||
|
use std::process::Command;
|
||||||
|
use tracing::debug;
|
||||||
|
|
||||||
|
use super::{Collector, CollectorError};
|
||||||
|
use crate::config::NetworkConfig;
|
||||||
|
|
||||||
|
/// Network interface collector with physical/virtual classification and link status
|
||||||
|
pub struct NetworkCollector {
|
||||||
|
_config: NetworkConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NetworkCollector {
|
||||||
|
pub fn new(config: NetworkConfig) -> Self {
|
||||||
|
Self { _config: config }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if interface is physical (not virtual)
|
||||||
|
fn is_physical_interface(name: &str) -> bool {
|
||||||
|
// Physical interface patterns
|
||||||
|
matches!(
|
||||||
|
&name[..],
|
||||||
|
s if s.starts_with("eth")
|
||||||
|
|| s.starts_with("ens")
|
||||||
|
|| s.starts_with("enp")
|
||||||
|
|| s.starts_with("wlan")
|
||||||
|
|| s.starts_with("wlp")
|
||||||
|
|| s.starts_with("eno")
|
||||||
|
|| s.starts_with("enx")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get link status for an interface
|
||||||
|
fn get_link_status(interface: &str) -> Status {
|
||||||
|
let operstate_path = format!("/sys/class/net/{}/operstate", interface);
|
||||||
|
|
||||||
|
match std::fs::read_to_string(&operstate_path) {
|
||||||
|
Ok(state) => {
|
||||||
|
let state = state.trim();
|
||||||
|
match state {
|
||||||
|
"up" => Status::Ok,
|
||||||
|
"down" => Status::Inactive,
|
||||||
|
"unknown" => Status::Warning,
|
||||||
|
_ => Status::Unknown,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => Status::Unknown,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the primary physical interface (the one with default route)
|
||||||
|
fn get_primary_physical_interface() -> Option<String> {
|
||||||
|
match Command::new("timeout").args(["2", "ip", "route", "show", "default"]).output() {
|
||||||
|
Ok(output) if output.status.success() => {
|
||||||
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
// Parse: "default via 192.168.1.1 dev eno1 ..."
|
||||||
|
for line in output_str.lines() {
|
||||||
|
if line.starts_with("default") {
|
||||||
|
if let Some(dev_pos) = line.find(" dev ") {
|
||||||
|
let after_dev = &line[dev_pos + 5..];
|
||||||
|
if let Some(space_pos) = after_dev.find(' ') {
|
||||||
|
let interface = &after_dev[..space_pos];
|
||||||
|
// Only return if it's a physical interface
|
||||||
|
if Self::is_physical_interface(interface) {
|
||||||
|
return Some(interface.to_string());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No space after interface name (end of line)
|
||||||
|
let interface = after_dev.trim();
|
||||||
|
if Self::is_physical_interface(interface) {
|
||||||
|
return Some(interface.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse VLAN configuration from /proc/net/vlan/config
|
||||||
|
/// Returns a map of interface name -> VLAN ID
|
||||||
|
fn parse_vlan_config() -> std::collections::HashMap<String, u16> {
|
||||||
|
let mut vlan_map = std::collections::HashMap::new();
|
||||||
|
|
||||||
|
if let Ok(contents) = std::fs::read_to_string("/proc/net/vlan/config") {
|
||||||
|
for line in contents.lines().skip(2) { // Skip header lines
|
||||||
|
let parts: Vec<&str> = line.split('|').collect();
|
||||||
|
if parts.len() >= 2 {
|
||||||
|
let interface_name = parts[0].trim();
|
||||||
|
let vlan_id_str = parts[1].trim();
|
||||||
|
|
||||||
|
if let Ok(vlan_id) = vlan_id_str.parse::<u16>() {
|
||||||
|
vlan_map.insert(interface_name.to_string(), vlan_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vlan_map
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect network interfaces using ip command
|
||||||
|
async fn collect_interfaces(&self) -> Vec<NetworkInterfaceData> {
|
||||||
|
let mut interfaces = Vec::new();
|
||||||
|
|
||||||
|
// Parse VLAN configuration
|
||||||
|
let vlan_map = Self::parse_vlan_config();
|
||||||
|
|
||||||
|
match Command::new("timeout").args(["2", "ip", "-j", "addr"]).output() {
|
||||||
|
Ok(output) if output.status.success() => {
|
||||||
|
let json_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
|
||||||
|
if let Ok(json_data) = serde_json::from_str::<serde_json::Value>(&json_str) {
|
||||||
|
if let Some(ifaces) = json_data.as_array() {
|
||||||
|
for iface in ifaces {
|
||||||
|
let name = iface["ifname"].as_str().unwrap_or("").to_string();
|
||||||
|
|
||||||
|
// Skip loopback, empty names, and ifb* interfaces
|
||||||
|
if name.is_empty() || name == "lo" || name.starts_with("ifb") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse parent interface from @parent notation (e.g., lan@enp0s31f6)
|
||||||
|
let (interface_name, parent_interface) = if let Some(at_pos) = name.find('@') {
|
||||||
|
let (child, parent) = name.split_at(at_pos);
|
||||||
|
(child.to_string(), Some(parent[1..].to_string()))
|
||||||
|
} else {
|
||||||
|
(name.clone(), None)
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut ipv4_addresses = Vec::new();
|
||||||
|
let mut ipv6_addresses = Vec::new();
|
||||||
|
|
||||||
|
// Extract IP addresses
|
||||||
|
if let Some(addr_info) = iface["addr_info"].as_array() {
|
||||||
|
for addr in addr_info {
|
||||||
|
if let Some(family) = addr["family"].as_str() {
|
||||||
|
if let Some(local) = addr["local"].as_str() {
|
||||||
|
match family {
|
||||||
|
"inet" => ipv4_addresses.push(local.to_string()),
|
||||||
|
"inet6" => {
|
||||||
|
// Skip link-local IPv6 addresses (fe80::)
|
||||||
|
if !local.starts_with("fe80:") {
|
||||||
|
ipv6_addresses.push(local.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine if physical and get status
|
||||||
|
let is_physical = Self::is_physical_interface(&interface_name);
|
||||||
|
|
||||||
|
// Only filter out virtual interfaces without IPs
|
||||||
|
// Physical interfaces should always be shown even if down/no IPs
|
||||||
|
if !is_physical && ipv4_addresses.is_empty() && ipv6_addresses.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let link_status = if is_physical {
|
||||||
|
Self::get_link_status(&name)
|
||||||
|
} else {
|
||||||
|
Status::Unknown // Virtual interfaces don't have meaningful link status
|
||||||
|
};
|
||||||
|
|
||||||
|
// Look up VLAN ID from the map (use original name before @ parsing)
|
||||||
|
let vlan_id = vlan_map.get(&name).copied();
|
||||||
|
|
||||||
|
interfaces.push(NetworkInterfaceData {
|
||||||
|
name: interface_name,
|
||||||
|
ipv4_addresses,
|
||||||
|
ipv6_addresses,
|
||||||
|
is_physical,
|
||||||
|
link_status,
|
||||||
|
parent_interface,
|
||||||
|
vlan_id,
|
||||||
|
connection_method: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!("Failed to execute ip command: {}", e);
|
||||||
|
}
|
||||||
|
Ok(output) => {
|
||||||
|
debug!("ip command failed with status: {}", output.status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assign primary physical interface as parent to virtual interfaces without explicit parent
|
||||||
|
let primary_interface = Self::get_primary_physical_interface();
|
||||||
|
if let Some(primary) = primary_interface {
|
||||||
|
for interface in interfaces.iter_mut() {
|
||||||
|
// Only assign parent to virtual interfaces that don't already have one
|
||||||
|
if !interface.is_physical && interface.parent_interface.is_none() {
|
||||||
|
interface.parent_interface = Some(primary.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interfaces
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Collector for NetworkCollector {
|
||||||
|
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
|
debug!("Collecting network interface data");
|
||||||
|
|
||||||
|
// Collect all network interfaces
|
||||||
|
let interfaces = self.collect_interfaces().await;
|
||||||
|
|
||||||
|
agent_data.system.network.interfaces = interfaces;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,228 +1,111 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker};
|
use cm_dashboard_shared::AgentData;
|
||||||
|
use std::fs;
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use super::{Collector, CollectorError};
|
use super::{Collector, CollectorError};
|
||||||
use crate::config::NixOSConfig;
|
|
||||||
|
|
||||||
/// NixOS system information collector
|
/// NixOS system information collector with structured data output
|
||||||
///
|
///
|
||||||
/// Collects NixOS-specific system information including:
|
/// This collector gathers NixOS-specific information like:
|
||||||
/// - NixOS version and build information
|
/// - System generation/build information
|
||||||
/// - Currently active/logged in users
|
/// - Version information
|
||||||
pub struct NixOSCollector {
|
/// - Agent version from Nix store path
|
||||||
}
|
pub struct NixOSCollector;
|
||||||
|
|
||||||
impl NixOSCollector {
|
impl NixOSCollector {
|
||||||
pub fn new(_config: NixOSConfig) -> Self {
|
pub fn new(_config: crate::config::NixOSConfig) -> Self {
|
||||||
Self {}
|
Self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get NixOS build information
|
/// Collect NixOS system information and populate AgentData
|
||||||
fn get_nixos_build_info(&self) -> Result<String, Box<dyn std::error::Error>> {
|
async fn collect_nixos_info(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
// Get nixos-version output directly
|
debug!("Collecting NixOS system information");
|
||||||
let output = Command::new("nixos-version").output()?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
return Err("nixos-version command failed".into());
|
|
||||||
}
|
|
||||||
|
|
||||||
let version_line = String::from_utf8_lossy(&output.stdout);
|
// Set hostname (this is universal, not NixOS-specific)
|
||||||
let version = version_line.trim();
|
agent_data.hostname = self.get_hostname().await.unwrap_or_else(|| "unknown".to_string());
|
||||||
|
|
||||||
if version.is_empty() {
|
// Set agent version from environment or Nix store path
|
||||||
return Err("Empty nixos-version output".into());
|
agent_data.agent_version = self.get_agent_version().await;
|
||||||
}
|
|
||||||
|
// Set NixOS build/generation information
|
||||||
// Remove codename part (e.g., "(Warbler)")
|
agent_data.build_version = self.get_nixos_generation().await;
|
||||||
let clean_version = if let Some(pos) = version.find(" (") {
|
|
||||||
version[..pos].to_string()
|
// Set current timestamp
|
||||||
} else {
|
agent_data.timestamp = chrono::Utc::now().timestamp() as u64;
|
||||||
version.to_string()
|
|
||||||
};
|
Ok(())
|
||||||
|
|
||||||
Ok(clean_version)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get agent hash from binary path
|
/// Get system hostname
|
||||||
fn get_agent_hash(&self) -> Result<String, Box<dyn std::error::Error>> {
|
async fn get_hostname(&self) -> Option<String> {
|
||||||
// Get the path of the current executable
|
match fs::read_to_string("/etc/hostname") {
|
||||||
let exe_path = std::env::current_exe()?;
|
Ok(hostname) => Some(hostname.trim().to_string()),
|
||||||
let exe_str = exe_path.to_string_lossy();
|
Err(_) => {
|
||||||
|
// Fallback to hostname command (with 2 second timeout)
|
||||||
// Extract Nix store hash from path like /nix/store/fn804fh332mp8gz06qawminpj20xl25h-cm-dashboard-0.1.0/bin/cm-dashboard-agent
|
match Command::new("timeout").args(["2", "hostname"]).output() {
|
||||||
if let Some(store_path) = exe_str.strip_prefix("/nix/store/") {
|
Ok(output) => Some(String::from_utf8_lossy(&output.stdout).trim().to_string()),
|
||||||
if let Some(dash_pos) = store_path.find('-') {
|
Err(_) => None,
|
||||||
return Ok(store_path[..dash_pos].to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback to "unknown" if not in Nix store
|
|
||||||
Ok("unknown".to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get configuration hash from deployed nix store system
|
|
||||||
fn get_config_hash(&self) -> Result<String, Box<dyn std::error::Error>> {
|
|
||||||
// Read the symlink target of /run/current-system to get nix store path
|
|
||||||
let output = Command::new("readlink")
|
|
||||||
.arg("/run/current-system")
|
|
||||||
.output()?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
return Err("readlink command failed".into());
|
|
||||||
}
|
|
||||||
|
|
||||||
let binding = String::from_utf8_lossy(&output.stdout);
|
|
||||||
let store_path = binding.trim();
|
|
||||||
|
|
||||||
// Extract hash from nix store path
|
|
||||||
// Format: /nix/store/HASH-nixos-system-HOSTNAME-VERSION
|
|
||||||
if let Some(hash_part) = store_path.strip_prefix("/nix/store/") {
|
|
||||||
if let Some(hash) = hash_part.split('-').next() {
|
|
||||||
if hash.len() >= 8 {
|
|
||||||
// Return first 8 characters of nix store hash
|
|
||||||
return Ok(hash[..8].to_string());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Err("Could not extract hash from nix store path".into())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get currently active users
|
/// Get agent version from Nix store path or environment
|
||||||
fn get_active_users(&self) -> Result<Vec<String>, Box<dyn std::error::Error>> {
|
async fn get_agent_version(&self) -> String {
|
||||||
let output = Command::new("who").output()?;
|
// Try to extract version from the current executable path (Nix store)
|
||||||
|
if let Ok(current_exe) = std::env::current_exe() {
|
||||||
if !output.status.success() {
|
if let Some(exe_path) = current_exe.to_str() {
|
||||||
return Err("who command failed".into());
|
if exe_path.starts_with("/nix/store/") {
|
||||||
}
|
// Extract version from Nix store path
|
||||||
|
// Path format: /nix/store/hash-cm-dashboard-agent-v0.1.138/bin/cm-dashboard-agent
|
||||||
let who_output = String::from_utf8_lossy(&output.stdout);
|
if let Some(store_part) = exe_path.strip_prefix("/nix/store/") {
|
||||||
let mut users = std::collections::HashSet::new();
|
if let Some(dash_pos) = store_part.find('-') {
|
||||||
|
let package_part = &store_part[dash_pos + 1..];
|
||||||
for line in who_output.lines() {
|
if let Some(bin_pos) = package_part.find("/bin/") {
|
||||||
if let Some(username) = line.split_whitespace().next() {
|
let package_name = &package_part[..bin_pos];
|
||||||
if !username.is_empty() {
|
// Extract version from package name
|
||||||
users.insert(username.to_string());
|
if let Some(version_start) = package_name.rfind("-v") {
|
||||||
|
return package_name[version_start + 1..].to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(users.into_iter().collect())
|
// Fallback to environment variable or default
|
||||||
|
std::env::var("CM_DASHBOARD_VERSION").unwrap_or_else(|_| "unknown".to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get NixOS system generation (build) information from git commit
|
||||||
|
async fn get_nixos_generation(&self) -> Option<String> {
|
||||||
|
// Try to read git commit hash from file written during rebuild
|
||||||
|
let commit_file = "/var/lib/cm-dashboard/git-commit";
|
||||||
|
match fs::read_to_string(commit_file) {
|
||||||
|
Ok(content) => {
|
||||||
|
let commit_hash = content.trim();
|
||||||
|
if commit_hash.len() >= 7 {
|
||||||
|
debug!("Found git commit hash: {}", commit_hash);
|
||||||
|
Some(commit_hash.to_string())
|
||||||
|
} else {
|
||||||
|
debug!("Git commit hash too short: {}", commit_hash);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!("Failed to read git commit file {}: {}", commit_file, e);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl Collector for NixOSCollector {
|
impl Collector for NixOSCollector {
|
||||||
|
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
async fn collect(&self, _status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
self.collect_nixos_info(agent_data).await
|
||||||
debug!("Collecting NixOS system information");
|
|
||||||
let mut metrics = Vec::new();
|
|
||||||
let timestamp = chrono::Utc::now().timestamp() as u64;
|
|
||||||
|
|
||||||
// Collect NixOS build information
|
|
||||||
match self.get_nixos_build_info() {
|
|
||||||
Ok(build_info) => {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "system_nixos_build".to_string(),
|
|
||||||
value: MetricValue::String(build_info),
|
|
||||||
unit: None,
|
|
||||||
description: Some("NixOS build information".to_string()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
debug!("Failed to get NixOS build info: {}", e);
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "system_nixos_build".to_string(),
|
|
||||||
value: MetricValue::String("unknown".to_string()),
|
|
||||||
unit: None,
|
|
||||||
description: Some("NixOS build (failed to detect)".to_string()),
|
|
||||||
status: Status::Unknown,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Collect active users
|
|
||||||
match self.get_active_users() {
|
|
||||||
Ok(users) => {
|
|
||||||
let users_str = users.join(", ");
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "system_active_users".to_string(),
|
|
||||||
value: MetricValue::String(users_str),
|
|
||||||
unit: None,
|
|
||||||
description: Some("Currently active users".to_string()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
debug!("Failed to get active users: {}", e);
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "system_active_users".to_string(),
|
|
||||||
value: MetricValue::String("unknown".to_string()),
|
|
||||||
unit: None,
|
|
||||||
description: Some("Active users (failed to detect)".to_string()),
|
|
||||||
status: Status::Unknown,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Collect config hash
|
|
||||||
match self.get_config_hash() {
|
|
||||||
Ok(hash) => {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "system_config_hash".to_string(),
|
|
||||||
value: MetricValue::String(hash),
|
|
||||||
unit: None,
|
|
||||||
description: Some("NixOS deployed configuration hash".to_string()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
debug!("Failed to get config hash: {}", e);
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "system_config_hash".to_string(),
|
|
||||||
value: MetricValue::String("unknown".to_string()),
|
|
||||||
unit: None,
|
|
||||||
description: Some("Deployed config hash (failed to detect)".to_string()),
|
|
||||||
status: Status::Unknown,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Collect agent hash
|
|
||||||
match self.get_agent_hash() {
|
|
||||||
Ok(hash) => {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "system_agent_hash".to_string(),
|
|
||||||
value: MetricValue::String(hash),
|
|
||||||
unit: None,
|
|
||||||
description: Some("Agent Nix store hash".to_string()),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
debug!("Failed to get agent hash: {}", e);
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: "system_agent_hash".to_string(),
|
|
||||||
value: MetricValue::String("unknown".to_string()),
|
|
||||||
unit: None,
|
|
||||||
description: Some("Agent hash (failed to detect)".to_string()),
|
|
||||||
status: Status::Unknown,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("Collected {} NixOS metrics", metrics.len());
|
|
||||||
Ok(metrics)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1,15 +1,15 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker};
|
use cm_dashboard_shared::{AgentData, ServiceData, SubServiceData, SubServiceMetric, Status};
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
use std::sync::RwLock;
|
use std::sync::RwLock;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use tracing::debug;
|
use tracing::{debug, info};
|
||||||
|
|
||||||
use super::{Collector, CollectorError};
|
use super::{Collector, CollectorError};
|
||||||
use crate::config::SystemdConfig;
|
use crate::config::SystemdConfig;
|
||||||
|
|
||||||
/// Systemd collector for monitoring systemd services
|
/// Systemd collector for monitoring systemd services with structured data output
|
||||||
pub struct SystemdCollector {
|
pub struct SystemdCollector {
|
||||||
/// Cached state with thread-safe interior mutability
|
/// Cached state with thread-safe interior mutability
|
||||||
state: RwLock<ServiceCacheState>,
|
state: RwLock<ServiceCacheState>,
|
||||||
@ -18,48 +18,288 @@ pub struct SystemdCollector {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Internal state for service caching
|
/// Internal state for service caching
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
struct ServiceCacheState {
|
struct ServiceCacheState {
|
||||||
|
/// Last collection time for performance tracking
|
||||||
|
last_collection: Option<Instant>,
|
||||||
|
/// Cached complete service data with sub-services
|
||||||
|
cached_service_data: Vec<ServiceData>,
|
||||||
/// Interesting services to monitor (cached after discovery)
|
/// Interesting services to monitor (cached after discovery)
|
||||||
monitored_services: Vec<String>,
|
monitored_services: Vec<String>,
|
||||||
/// Cached service status information from discovery
|
/// Cached service status information from discovery
|
||||||
service_status_cache: std::collections::HashMap<String, ServiceStatusInfo>,
|
service_status_cache: std::collections::HashMap<String, ServiceStatusInfo>,
|
||||||
/// Last time services were discovered
|
/// Last time services were discovered
|
||||||
last_discovery_time: Option<Instant>,
|
last_discovery_time: Option<Instant>,
|
||||||
/// How often to rediscover services (5 minutes)
|
/// How often to rediscover services (from config)
|
||||||
discovery_interval_seconds: u64,
|
discovery_interval_seconds: u64,
|
||||||
/// Cached nginx site latency metrics
|
/// Cached nginx site latency metrics
|
||||||
nginx_site_metrics: Vec<Metric>,
|
nginx_site_metrics: Vec<(String, f32)>,
|
||||||
/// Last time nginx sites were checked
|
/// Last time nginx sites were checked
|
||||||
last_nginx_check_time: Option<Instant>,
|
last_nginx_check_time: Option<Instant>,
|
||||||
/// How often to check nginx site latency (30 seconds)
|
/// How often to check nginx site latency (configurable)
|
||||||
nginx_check_interval_seconds: u64,
|
nginx_check_interval_seconds: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Cached service status information from systemctl list-units
|
/// Cached service status information from systemctl list-units
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct ServiceStatusInfo {
|
struct ServiceStatusInfo {
|
||||||
load_state: String,
|
|
||||||
active_state: String,
|
active_state: String,
|
||||||
sub_state: String,
|
memory_bytes: Option<u64>,
|
||||||
|
restart_count: Option<u32>,
|
||||||
|
start_timestamp: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SystemdCollector {
|
impl SystemdCollector {
|
||||||
pub fn new(config: SystemdConfig) -> Self {
|
pub fn new(config: SystemdConfig) -> Self {
|
||||||
|
let state = ServiceCacheState {
|
||||||
|
last_collection: None,
|
||||||
|
cached_service_data: Vec::new(),
|
||||||
|
monitored_services: Vec::new(),
|
||||||
|
service_status_cache: std::collections::HashMap::new(),
|
||||||
|
last_discovery_time: None,
|
||||||
|
discovery_interval_seconds: config.interval_seconds,
|
||||||
|
nginx_site_metrics: Vec::new(),
|
||||||
|
last_nginx_check_time: None,
|
||||||
|
nginx_check_interval_seconds: config.nginx_check_interval_seconds,
|
||||||
|
};
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
state: RwLock::new(ServiceCacheState {
|
state: RwLock::new(state),
|
||||||
monitored_services: Vec::new(),
|
|
||||||
service_status_cache: std::collections::HashMap::new(),
|
|
||||||
last_discovery_time: None,
|
|
||||||
discovery_interval_seconds: config.interval_seconds,
|
|
||||||
nginx_site_metrics: Vec::new(),
|
|
||||||
last_nginx_check_time: None,
|
|
||||||
nginx_check_interval_seconds: 30, // 30 seconds for nginx sites
|
|
||||||
}),
|
|
||||||
config,
|
config,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Collect service data and populate AgentData
|
||||||
|
async fn collect_service_data(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
|
let start_time = Instant::now();
|
||||||
|
debug!("Collecting systemd services metrics");
|
||||||
|
|
||||||
|
// Get cached services (discovery only happens when needed)
|
||||||
|
let monitored_services = match self.get_monitored_services() {
|
||||||
|
Ok(services) => services,
|
||||||
|
Err(e) => {
|
||||||
|
debug!("Failed to get monitored services: {}", e);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Collect service data for each monitored service
|
||||||
|
let mut complete_service_data = Vec::new();
|
||||||
|
for service_name in &monitored_services {
|
||||||
|
match self.get_service_status(service_name) {
|
||||||
|
Ok(status_info) => {
|
||||||
|
let mut sub_services = Vec::new();
|
||||||
|
|
||||||
|
// Calculate uptime if we have start timestamp
|
||||||
|
let uptime_seconds = status_info.start_timestamp.and_then(|start| {
|
||||||
|
let now = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.ok()?
|
||||||
|
.as_secs();
|
||||||
|
Some(now.saturating_sub(start))
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sub-service metrics for specific services (always include cached results)
|
||||||
|
if service_name.contains("nginx") && status_info.active_state == "active" {
|
||||||
|
let nginx_sites = self.get_nginx_site_metrics();
|
||||||
|
for (site_name, latency_ms) in nginx_sites {
|
||||||
|
let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms {
|
||||||
|
"active"
|
||||||
|
} else {
|
||||||
|
"failed"
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut metrics = Vec::new();
|
||||||
|
metrics.push(SubServiceMetric {
|
||||||
|
label: "latency_ms".to_string(),
|
||||||
|
value: latency_ms,
|
||||||
|
unit: Some("ms".to_string()),
|
||||||
|
});
|
||||||
|
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: site_name.clone(),
|
||||||
|
service_status: self.calculate_service_status(&site_name, &site_status),
|
||||||
|
metrics,
|
||||||
|
service_type: "nginx_site".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if service_name.contains("docker") && status_info.active_state == "active" {
|
||||||
|
let docker_containers = self.get_docker_containers();
|
||||||
|
for (container_name, container_status) in docker_containers {
|
||||||
|
// For now, docker containers have no additional metrics
|
||||||
|
// Future: could add memory_mb, cpu_percent, restart_count, etc.
|
||||||
|
let metrics = Vec::new();
|
||||||
|
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: container_name.clone(),
|
||||||
|
service_status: self.calculate_service_status(&container_name, &container_status),
|
||||||
|
metrics,
|
||||||
|
service_type: "container".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add Docker images
|
||||||
|
let docker_images = self.get_docker_images();
|
||||||
|
for (image_name, _image_status, image_size_mb) in docker_images {
|
||||||
|
let metrics = Vec::new();
|
||||||
|
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: format!("{} size: {:.1} MB", image_name, image_size_mb),
|
||||||
|
service_status: Status::Info, // Informational only, no status icon
|
||||||
|
metrics,
|
||||||
|
service_type: "image".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if service_name == "openvpn-vpn-download" && status_info.active_state == "active" {
|
||||||
|
// Add VPN route
|
||||||
|
if let Some(external_ip) = self.get_vpn_external_ip() {
|
||||||
|
let metrics = Vec::new();
|
||||||
|
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: format!("route: {}", external_ip),
|
||||||
|
service_status: Status::Info,
|
||||||
|
metrics,
|
||||||
|
service_type: "vpn_route".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add torrent stats
|
||||||
|
if let Some((active_count, download_mbps, upload_mbps)) = self.get_qbittorrent_stats() {
|
||||||
|
let metrics = Vec::new();
|
||||||
|
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: format!("{} active, ↓ {:.1} MB/s, ↑ {:.1} MB/s", active_count, download_mbps, upload_mbps),
|
||||||
|
service_status: Status::Info,
|
||||||
|
metrics,
|
||||||
|
service_type: "torrent_stats".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add active torrent copy status for each copy operation
|
||||||
|
for torrent_name in self.get_active_torrent_copies() {
|
||||||
|
let metrics = Vec::new();
|
||||||
|
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: format!("Copy: {}", torrent_name),
|
||||||
|
service_status: Status::Info,
|
||||||
|
metrics,
|
||||||
|
service_type: "torrent_copy".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if service_name == "nftables" && status_info.active_state == "active" {
|
||||||
|
let (tcp_ports, udp_ports) = self.get_nftables_open_ports();
|
||||||
|
|
||||||
|
if !tcp_ports.is_empty() {
|
||||||
|
let metrics = Vec::new();
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: format!("wan tcp: {}", tcp_ports),
|
||||||
|
service_status: Status::Info,
|
||||||
|
metrics,
|
||||||
|
service_type: "firewall_port".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if !udp_ports.is_empty() {
|
||||||
|
let metrics = Vec::new();
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: format!("wan udp: {}", udp_ports),
|
||||||
|
service_status: Status::Info,
|
||||||
|
metrics,
|
||||||
|
service_type: "firewall_port".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if service_name == "tailscaled" && status_info.active_state == "active" {
|
||||||
|
// Add Tailscale peers with their connection methods as sub-services
|
||||||
|
let peers = self.get_tailscale_peers();
|
||||||
|
for (peer_name, conn_method) in peers {
|
||||||
|
let metrics = Vec::new();
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: format!("{}: {}", peer_name, conn_method),
|
||||||
|
service_status: Status::Info,
|
||||||
|
metrics,
|
||||||
|
service_type: "tailscale_peer".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if service_name == "nfs-server" && status_info.active_state == "active" {
|
||||||
|
// Add NFS exports as sub-services
|
||||||
|
let exports = self.get_nfs_exports();
|
||||||
|
for (export_path, info) in exports {
|
||||||
|
let display = if !info.is_empty() {
|
||||||
|
format!("{} {}", export_path, info)
|
||||||
|
} else {
|
||||||
|
export_path
|
||||||
|
};
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: display,
|
||||||
|
service_status: Status::Info,
|
||||||
|
metrics: Vec::new(),
|
||||||
|
service_type: "nfs_export".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (service_name == "smbd" || service_name == "samba-smbd") && status_info.active_state == "active" {
|
||||||
|
// Add SMB shares as sub-services
|
||||||
|
let shares = self.get_smb_shares();
|
||||||
|
for (share_name, share_path) in shares {
|
||||||
|
let metrics = Vec::new();
|
||||||
|
sub_services.push(SubServiceData {
|
||||||
|
name: format!("{}: {}", share_name, share_path),
|
||||||
|
service_status: Status::Info,
|
||||||
|
metrics,
|
||||||
|
service_type: "smb_share".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create complete service data
|
||||||
|
let service_data = ServiceData {
|
||||||
|
name: service_name.clone(),
|
||||||
|
user_stopped: false, // TODO: Integrate with service tracker
|
||||||
|
service_status: self.calculate_service_status(service_name, &status_info.active_state),
|
||||||
|
sub_services,
|
||||||
|
memory_bytes: status_info.memory_bytes,
|
||||||
|
restart_count: status_info.restart_count,
|
||||||
|
uptime_seconds,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add to AgentData and cache
|
||||||
|
agent_data.services.push(service_data.clone());
|
||||||
|
complete_service_data.push(service_data);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!("Failed to get status for service {}: {}", service_name, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort services alphabetically by name
|
||||||
|
agent_data.services.sort_by(|a, b| a.name.cmp(&b.name));
|
||||||
|
complete_service_data.sort_by(|a, b| a.name.cmp(&b.name));
|
||||||
|
|
||||||
|
// Update cached state
|
||||||
|
{
|
||||||
|
let mut state = self.state.write().unwrap();
|
||||||
|
state.last_collection = Some(start_time);
|
||||||
|
state.cached_service_data = complete_service_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
let elapsed = start_time.elapsed();
|
||||||
|
debug!("Systemd collection completed in {:?} with {} services", elapsed, agent_data.services.len());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Get monitored services, discovering them if needed or cache is expired
|
/// Get monitored services, discovering them if needed or cache is expired
|
||||||
fn get_monitored_services(&self) -> Result<Vec<String>> {
|
fn get_monitored_services(&self) -> Result<Vec<String>> {
|
||||||
// Check if we need discovery without holding the lock
|
// Check if we need discovery without holding the lock
|
||||||
@ -76,25 +316,19 @@ impl SystemdCollector {
|
|||||||
|
|
||||||
if needs_discovery {
|
if needs_discovery {
|
||||||
debug!("Discovering systemd services (cache expired or first run)");
|
debug!("Discovering systemd services (cache expired or first run)");
|
||||||
// Call discover_services_internal which doesn't update state
|
|
||||||
match self.discover_services_internal() {
|
match self.discover_services_internal() {
|
||||||
Ok((services, status_cache)) => {
|
Ok((services, status_cache)) => {
|
||||||
// Update state with discovered services in a separate scope
|
|
||||||
if let Ok(mut state) = self.state.write() {
|
if let Ok(mut state) = self.state.write() {
|
||||||
state.monitored_services = services.clone();
|
state.monitored_services = services.clone();
|
||||||
state.service_status_cache = status_cache;
|
state.service_status_cache = status_cache;
|
||||||
state.last_discovery_time = Some(Instant::now());
|
state.last_discovery_time = Some(Instant::now());
|
||||||
debug!(
|
debug!("Auto-discovered {} services to monitor: {:?}",
|
||||||
"Auto-discovered {} services to monitor: {:?}",
|
state.monitored_services.len(), state.monitored_services);
|
||||||
state.monitored_services.len(),
|
|
||||||
state.monitored_services
|
|
||||||
);
|
|
||||||
return Ok(services);
|
return Ok(services);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
debug!("Failed to discover services, using cached list: {}", e);
|
debug!("Failed to discover services, using cached list: {}", e);
|
||||||
// Continue with existing cached services if discovery fails
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -104,8 +338,8 @@ impl SystemdCollector {
|
|||||||
Ok(state.monitored_services.clone())
|
Ok(state.monitored_services.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get nginx site metrics, checking them if cache is expired
|
/// Get nginx site metrics, checking them if cache is expired (like old working version)
|
||||||
fn get_nginx_site_metrics(&self) -> Vec<Metric> {
|
fn get_nginx_site_metrics(&self) -> Vec<(String, f32)> {
|
||||||
let mut state = self.state.write().unwrap();
|
let mut state = self.state.write().unwrap();
|
||||||
|
|
||||||
// Check if we need to refresh nginx site metrics
|
// Check if we need to refresh nginx site metrics
|
||||||
@ -120,11 +354,7 @@ impl SystemdCollector {
|
|||||||
if needs_refresh {
|
if needs_refresh {
|
||||||
// Only check nginx sites if nginx service is active
|
// Only check nginx sites if nginx service is active
|
||||||
if state.monitored_services.iter().any(|s| s.contains("nginx")) {
|
if state.monitored_services.iter().any(|s| s.contains("nginx")) {
|
||||||
debug!(
|
let fresh_metrics = self.get_nginx_sites_internal();
|
||||||
"Refreshing nginx site latency metrics (interval: {}s)",
|
|
||||||
state.nginx_check_interval_seconds
|
|
||||||
);
|
|
||||||
let fresh_metrics = self.get_nginx_sites();
|
|
||||||
state.nginx_site_metrics = fresh_metrics;
|
state.nginx_site_metrics = fresh_metrics;
|
||||||
state.last_nginx_check_time = Some(Instant::now());
|
state.last_nginx_check_time = Some(Instant::now());
|
||||||
}
|
}
|
||||||
@ -133,103 +363,158 @@ impl SystemdCollector {
|
|||||||
state.nginx_site_metrics.clone()
|
state.nginx_site_metrics.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Auto-discover interesting services to monitor (internal version that doesn't update state)
|
/// Auto-discover interesting services to monitor
|
||||||
fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
|
fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
|
||||||
debug!("Starting systemd service discovery with status caching");
|
// First: Get all service unit files (with 3 second timeout)
|
||||||
// Get all services (includes inactive, running, failed - everything)
|
let unit_files_output = Command::new("timeout")
|
||||||
let units_output = Command::new("systemctl")
|
.args(&["3", "systemctl", "list-unit-files", "--type=service", "--no-pager", "--plain"])
|
||||||
.arg("list-units")
|
|
||||||
.arg("--type=service")
|
|
||||||
.arg("--all")
|
|
||||||
.arg("--no-pager")
|
|
||||||
.arg("--plain")
|
|
||||||
.output()?;
|
.output()?;
|
||||||
|
|
||||||
if !units_output.status.success() {
|
if !unit_files_output.status.success() {
|
||||||
return Err(anyhow::anyhow!("systemctl system command failed"));
|
return Err(anyhow::anyhow!("systemctl list-unit-files command failed"));
|
||||||
}
|
}
|
||||||
|
|
||||||
let units_str = String::from_utf8(units_output.stdout)?;
|
// Second: Get runtime status of all units (with 3 second timeout)
|
||||||
|
let units_status_output = Command::new("timeout")
|
||||||
|
.args(&["3", "systemctl", "list-units", "--type=service", "--all", "--no-pager", "--plain"])
|
||||||
|
.output()?;
|
||||||
|
|
||||||
|
if !units_status_output.status.success() {
|
||||||
|
return Err(anyhow::anyhow!("systemctl list-units command failed"));
|
||||||
|
}
|
||||||
|
|
||||||
|
let unit_files_str = String::from_utf8(unit_files_output.stdout)?;
|
||||||
|
let units_status_str = String::from_utf8(units_status_output.stdout)?;
|
||||||
let mut services = Vec::new();
|
let mut services = Vec::new();
|
||||||
|
|
||||||
// Use configuration instead of hardcoded values
|
|
||||||
let excluded_services = &self.config.excluded_services;
|
let excluded_services = &self.config.excluded_services;
|
||||||
let service_name_filters = &self.config.service_name_filters;
|
let service_name_filters = &self.config.service_name_filters;
|
||||||
|
|
||||||
// Parse all services and cache their status information
|
// Parse all service unit files
|
||||||
let mut all_service_names = std::collections::HashSet::new();
|
let mut all_service_names = std::collections::HashSet::new();
|
||||||
let mut status_cache = std::collections::HashMap::new();
|
for line in unit_files_str.lines() {
|
||||||
|
|
||||||
for line in units_str.lines() {
|
|
||||||
let fields: Vec<&str> = line.split_whitespace().collect();
|
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||||
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
if fields.len() >= 2 && fields[0].ends_with(".service") {
|
||||||
let service_name = fields[0].trim_end_matches(".service");
|
let service_name = fields[0].trim_end_matches(".service");
|
||||||
|
|
||||||
// Extract status information from systemctl list-units output
|
|
||||||
let load_state = fields.get(1).unwrap_or(&"unknown").to_string();
|
|
||||||
let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
|
|
||||||
let sub_state = fields.get(3).unwrap_or(&"unknown").to_string();
|
|
||||||
|
|
||||||
// Cache the status information
|
|
||||||
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
|
||||||
load_state: load_state.clone(),
|
|
||||||
active_state: active_state.clone(),
|
|
||||||
sub_state: sub_state.clone(),
|
|
||||||
});
|
|
||||||
|
|
||||||
all_service_names.insert(service_name.to_string());
|
all_service_names.insert(service_name.to_string());
|
||||||
debug!("Parsed service: {} (load:{}, active:{}, sub:{})", service_name, load_state, active_state, sub_state);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse runtime status for all units
|
||||||
|
let mut status_cache = std::collections::HashMap::new();
|
||||||
|
for line in units_status_str.lines() {
|
||||||
|
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
||||||
|
let service_name = fields[0].trim_end_matches(".service");
|
||||||
|
let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
|
||||||
|
|
||||||
// Now process all discovered services
|
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||||
for service_name in &all_service_names {
|
active_state,
|
||||||
debug!("Processing service: '{}'", service_name);
|
memory_bytes: None,
|
||||||
|
restart_count: None,
|
||||||
// Skip excluded services first
|
start_timestamp: None,
|
||||||
let mut is_excluded = false;
|
});
|
||||||
for excluded in excluded_services {
|
}
|
||||||
if service_name.contains(excluded) {
|
|
||||||
debug!(
|
|
||||||
"EXCLUDING service '{}' because it matches pattern '{}'",
|
|
||||||
service_name, excluded
|
|
||||||
);
|
|
||||||
is_excluded = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if is_excluded {
|
|
||||||
debug!("Skipping excluded service: '{}'", service_name);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if this service matches our filter patterns (supports wildcards)
|
|
||||||
for pattern in service_name_filters {
|
|
||||||
if self.matches_pattern(service_name, pattern) {
|
|
||||||
debug!(
|
|
||||||
"INCLUDING service '{}' because it matches pattern '{}'",
|
|
||||||
service_name, pattern
|
|
||||||
);
|
|
||||||
services.push(service_name.to_string());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("Service discovery completed: found {} matching services: {:?}", services.len(), services);
|
// For services found in unit files but not in runtime status, set default inactive status
|
||||||
if services.is_empty() {
|
for service_name in &all_service_names {
|
||||||
debug!("No services found matching the configured filters - this may indicate a parsing issue");
|
if !status_cache.contains_key(service_name) {
|
||||||
|
status_cache.insert(service_name.to_string(), ServiceStatusInfo {
|
||||||
|
active_state: "inactive".to_string(),
|
||||||
|
memory_bytes: None,
|
||||||
|
restart_count: None,
|
||||||
|
start_timestamp: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process all discovered services and apply filters
|
||||||
|
for service_name in &all_service_names {
|
||||||
|
// Skip excluded services first
|
||||||
|
let mut is_excluded = false;
|
||||||
|
for excluded in excluded_services {
|
||||||
|
if service_name.contains(excluded) {
|
||||||
|
is_excluded = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_excluded {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this service matches our filter patterns (supports wildcards)
|
||||||
|
for pattern in service_name_filters {
|
||||||
|
if self.matches_pattern(service_name, pattern) {
|
||||||
|
services.push(service_name.to_string());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((services, status_cache))
|
Ok((services, status_cache))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get service status with detailed metrics from systemctl
|
||||||
|
fn get_service_status(&self, service: &str) -> Result<ServiceStatusInfo> {
|
||||||
|
// Always fetch fresh data to get detailed metrics (memory, restarts, uptime)
|
||||||
|
// Note: Cache in service_status_cache only has basic active_state from discovery,
|
||||||
|
// with all detailed metrics set to None. We need fresh systemctl show data.
|
||||||
|
|
||||||
|
let output = Command::new("timeout")
|
||||||
|
.args(&[
|
||||||
|
"2",
|
||||||
|
"systemctl",
|
||||||
|
"show",
|
||||||
|
&format!("{}.service", service),
|
||||||
|
"--property=LoadState,ActiveState,SubState,MemoryCurrent,NRestarts,ExecMainStartTimestamp"
|
||||||
|
])
|
||||||
|
.output()?;
|
||||||
|
|
||||||
|
let output_str = String::from_utf8(output.stdout)?;
|
||||||
|
|
||||||
|
// Parse properties
|
||||||
|
let mut active_state = String::new();
|
||||||
|
let mut memory_bytes = None;
|
||||||
|
let mut restart_count = None;
|
||||||
|
let mut start_timestamp = None;
|
||||||
|
|
||||||
|
for line in output_str.lines() {
|
||||||
|
if let Some(value) = line.strip_prefix("ActiveState=") {
|
||||||
|
active_state = value.to_string();
|
||||||
|
} else if let Some(value) = line.strip_prefix("MemoryCurrent=") {
|
||||||
|
if value != "[not set]" {
|
||||||
|
memory_bytes = value.parse().ok();
|
||||||
|
}
|
||||||
|
} else if let Some(value) = line.strip_prefix("NRestarts=") {
|
||||||
|
restart_count = value.parse().ok();
|
||||||
|
} else if let Some(value) = line.strip_prefix("ExecMainStartTimestamp=") {
|
||||||
|
if value != "[not set]" && !value.is_empty() {
|
||||||
|
// Parse timestamp to seconds since epoch
|
||||||
|
if let Ok(output) = Command::new("date")
|
||||||
|
.args(&["+%s", "-d", value])
|
||||||
|
.output()
|
||||||
|
{
|
||||||
|
if let Ok(timestamp_str) = String::from_utf8(output.stdout) {
|
||||||
|
start_timestamp = timestamp_str.trim().parse().ok();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ServiceStatusInfo {
|
||||||
|
active_state,
|
||||||
|
memory_bytes,
|
||||||
|
restart_count,
|
||||||
|
start_timestamp,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// Check if service name matches pattern (supports wildcards like nginx*)
|
/// Check if service name matches pattern (supports wildcards like nginx*)
|
||||||
fn matches_pattern(&self, service_name: &str, pattern: &str) -> bool {
|
fn matches_pattern(&self, service_name: &str, pattern: &str) -> bool {
|
||||||
if pattern.contains('*') {
|
if pattern.contains('*') {
|
||||||
// Wildcard pattern matching
|
|
||||||
if pattern.ends_with('*') {
|
if pattern.ends_with('*') {
|
||||||
// Pattern like "nginx*" - match if service starts with "nginx"
|
// Pattern like "nginx*" - match if service starts with "nginx"
|
||||||
let prefix = &pattern[..pattern.len() - 1];
|
let prefix = &pattern[..pattern.len() - 1];
|
||||||
@ -243,406 +528,94 @@ impl SystemdCollector {
|
|||||||
self.simple_glob_match(service_name, pattern)
|
self.simple_glob_match(service_name, pattern)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Exact match (existing behavior)
|
// Exact match
|
||||||
service_name == pattern
|
service_name == pattern
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Simple glob pattern matching for patterns with * in middle
|
/// Simple glob matching for patterns with * in the middle
|
||||||
fn simple_glob_match(&self, text: &str, pattern: &str) -> bool {
|
fn simple_glob_match(&self, text: &str, pattern: &str) -> bool {
|
||||||
let parts: Vec<&str> = pattern.split('*').collect();
|
let parts: Vec<&str> = pattern.split('*').collect();
|
||||||
if parts.is_empty() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut pos = 0;
|
let mut pos = 0;
|
||||||
for (i, part) in parts.iter().enumerate() {
|
|
||||||
|
for part in parts {
|
||||||
if part.is_empty() {
|
if part.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if let Some(found_pos) = text[pos..].find(part) {
|
||||||
if i == 0 {
|
pos += found_pos + part.len();
|
||||||
// First part must match at start
|
|
||||||
if !text[pos..].starts_with(part) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
pos += part.len();
|
|
||||||
} else if i == parts.len() - 1 {
|
|
||||||
// Last part must match at end
|
|
||||||
return text[pos..].ends_with(part);
|
|
||||||
} else {
|
} else {
|
||||||
// Middle part must be found somewhere
|
return false;
|
||||||
if let Some(found_pos) = text[pos..].find(part) {
|
|
||||||
pos += found_pos + part.len();
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get service status from cache (if available) or fallback to systemctl
|
/// Calculate service status, taking user-stopped services into account
|
||||||
fn get_service_status(&self, service: &str) -> Result<(String, String)> {
|
fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status {
|
||||||
// Try to get status from cache first
|
|
||||||
if let Ok(state) = self.state.read() {
|
|
||||||
if let Some(cached_info) = state.service_status_cache.get(service) {
|
|
||||||
let active_status = cached_info.active_state.clone();
|
|
||||||
let detailed_info = format!(
|
|
||||||
"LoadState={}\nActiveState={}\nSubState={}",
|
|
||||||
cached_info.load_state,
|
|
||||||
cached_info.active_state,
|
|
||||||
cached_info.sub_state
|
|
||||||
);
|
|
||||||
return Ok((active_status, detailed_info));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback to systemctl if not in cache (shouldn't happen during normal operation)
|
|
||||||
debug!("Service '{}' not found in cache, falling back to systemctl", service);
|
|
||||||
let output = Command::new("systemctl")
|
|
||||||
.arg("is-active")
|
|
||||||
.arg(format!("{}.service", service))
|
|
||||||
.output()?;
|
|
||||||
|
|
||||||
let active_status = String::from_utf8(output.stdout)?.trim().to_string();
|
|
||||||
|
|
||||||
// Get more detailed info
|
|
||||||
let output = Command::new("systemctl")
|
|
||||||
.arg("show")
|
|
||||||
.arg(format!("{}.service", service))
|
|
||||||
.arg("--property=LoadState,ActiveState,SubState")
|
|
||||||
.output()?;
|
|
||||||
|
|
||||||
let detailed_info = String::from_utf8(output.stdout)?;
|
|
||||||
Ok((active_status, detailed_info))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Calculate service status
|
|
||||||
fn calculate_service_status(&self, active_status: &str) -> Status {
|
|
||||||
match active_status.to_lowercase().as_str() {
|
match active_status.to_lowercase().as_str() {
|
||||||
"active" => Status::Ok,
|
"active" => Status::Ok,
|
||||||
"inactive" | "dead" => Status::Warning,
|
"inactive" | "dead" => {
|
||||||
|
debug!("Service '{}' is inactive - treating as Inactive status", service_name);
|
||||||
|
Status::Inactive
|
||||||
|
},
|
||||||
"failed" | "error" => Status::Critical,
|
"failed" | "error" => Status::Critical,
|
||||||
"activating" | "deactivating" | "reloading" | "start" | "stop" | "restart" => Status::Pending,
|
"activating" | "deactivating" | "reloading" | "starting" | "stopping" => {
|
||||||
|
debug!("Service '{}' is transitioning - treating as Pending", service_name);
|
||||||
|
Status::Pending
|
||||||
|
},
|
||||||
_ => Status::Unknown,
|
_ => Status::Unknown,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get service memory usage (if available)
|
/// Check if service collection cache should be updated
|
||||||
fn get_service_memory(&self, service: &str) -> Option<f32> {
|
fn should_update_cache(&self) -> bool {
|
||||||
let output = Command::new("systemctl")
|
let state = self.state.read().unwrap();
|
||||||
.arg("show")
|
|
||||||
.arg(format!("{}.service", service))
|
|
||||||
.arg("--property=MemoryCurrent")
|
|
||||||
.output()
|
|
||||||
.ok()?;
|
|
||||||
|
|
||||||
let output_str = String::from_utf8(output.stdout).ok()?;
|
match state.last_collection {
|
||||||
for line in output_str.lines() {
|
None => true,
|
||||||
if line.starts_with("MemoryCurrent=") {
|
Some(last) => {
|
||||||
let memory_str = line.trim_start_matches("MemoryCurrent=");
|
let cache_duration = std::time::Duration::from_secs(30);
|
||||||
if let Ok(memory_bytes) = memory_str.parse::<u64>() {
|
last.elapsed() > cache_duration
|
||||||
return Some(memory_bytes as f32 / (1024.0 * 1024.0)); // Convert to MB
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get cached complete service data with sub-services if available and fresh
|
||||||
/// Get directory size in GB with permission-aware logging
|
fn get_cached_complete_services(&self) -> Option<Vec<ServiceData>> {
|
||||||
fn get_directory_size(&self, dir: &str) -> Option<f32> {
|
if !self.should_update_cache() {
|
||||||
let output = Command::new("sudo").arg("du").arg("-sb").arg(dir).output().ok()?;
|
let state = self.state.read().unwrap();
|
||||||
|
Some(state.cached_service_data.clone())
|
||||||
if !output.status.success() {
|
|
||||||
// Log permission errors for debugging but don't spam logs
|
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
||||||
if stderr.contains("Permission denied") {
|
|
||||||
debug!("Permission denied accessing directory: {}", dir);
|
|
||||||
} else {
|
|
||||||
debug!("Failed to get size for directory {}: {}", dir, stderr);
|
|
||||||
}
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let output_str = String::from_utf8(output.stdout).ok()?;
|
|
||||||
let size_str = output_str.split_whitespace().next()?;
|
|
||||||
if let Ok(size_bytes) = size_str.parse::<u64>() {
|
|
||||||
let size_gb = size_bytes as f32 / (1024.0 * 1024.0 * 1024.0);
|
|
||||||
// Return size even if very small (minimum 0.001 GB = 1MB for visibility)
|
|
||||||
if size_gb > 0.0 {
|
|
||||||
Some(size_gb.max(0.001))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get service disk usage - simplified and configuration-driven
|
/// Get nginx sites with latency checks (internal - no caching)
|
||||||
fn get_service_disk_usage(&self, service: &str) -> Option<f32> {
|
fn get_nginx_sites_internal(&self) -> Vec<(String, f32)> {
|
||||||
// 1. Check if service has configured directories (exact match only)
|
let mut sites = Vec::new();
|
||||||
if let Some(dirs) = self.config.service_directories.get(service) {
|
|
||||||
// Service has configured paths - use the first accessible one
|
|
||||||
for dir in dirs {
|
|
||||||
if let Some(size) = self.get_directory_size(dir) {
|
|
||||||
return Some(size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If configured paths failed, return None (shows as 0)
|
|
||||||
return Some(0.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. No configured path - use systemctl WorkingDirectory
|
|
||||||
let output = Command::new("systemctl")
|
|
||||||
.arg("show")
|
|
||||||
.arg(format!("{}.service", service))
|
|
||||||
.arg("--property=WorkingDirectory")
|
|
||||||
.output()
|
|
||||||
.ok()?;
|
|
||||||
|
|
||||||
let output_str = String::from_utf8(output.stdout).ok()?;
|
|
||||||
for line in output_str.lines() {
|
|
||||||
if line.starts_with("WorkingDirectory=") && !line.contains("[not set]") {
|
|
||||||
let dir = line.trim_start_matches("WorkingDirectory=");
|
|
||||||
if !dir.is_empty() && dir != "/" {
|
|
||||||
return self.get_directory_size(dir);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait]
|
|
||||||
impl Collector for SystemdCollector {
|
|
||||||
|
|
||||||
async fn collect(&self, _status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
|
||||||
let start_time = Instant::now();
|
|
||||||
debug!("Collecting systemd services metrics");
|
|
||||||
|
|
||||||
let mut metrics = Vec::new();
|
|
||||||
|
|
||||||
// Get cached services (discovery only happens when needed)
|
|
||||||
let monitored_services = match self.get_monitored_services() {
|
|
||||||
Ok(services) => services,
|
|
||||||
Err(e) => {
|
|
||||||
debug!("Failed to get monitored services: {}", e);
|
|
||||||
return Ok(metrics);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Collect individual metrics for each monitored service (status, memory, disk only)
|
|
||||||
for service in &monitored_services {
|
|
||||||
match self.get_service_status(service) {
|
|
||||||
Ok((active_status, _detailed_info)) => {
|
|
||||||
let status = self.calculate_service_status(&active_status);
|
|
||||||
|
|
||||||
// Individual service status metric
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("service_{}_status", service),
|
|
||||||
value: MetricValue::String(active_status.clone()),
|
|
||||||
unit: None,
|
|
||||||
description: Some(format!("Service {} status", service)),
|
|
||||||
status,
|
|
||||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Service memory usage (if available)
|
|
||||||
if let Some(memory_mb) = self.get_service_memory(service) {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("service_{}_memory_mb", service),
|
|
||||||
value: MetricValue::Float(memory_mb),
|
|
||||||
unit: Some("MB".to_string()),
|
|
||||||
description: Some(format!("Service {} memory usage", service)),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Service disk usage (comprehensive detection)
|
|
||||||
if let Some(disk_gb) = self.get_service_disk_usage(service) {
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("service_{}_disk_gb", service),
|
|
||||||
value: MetricValue::Float(disk_gb),
|
|
||||||
unit: Some("GB".to_string()),
|
|
||||||
description: Some(format!("Service {} disk usage", service)),
|
|
||||||
status: Status::Ok,
|
|
||||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sub-service metrics for specific services
|
|
||||||
if service.contains("nginx") && active_status == "active" {
|
|
||||||
metrics.extend(self.get_nginx_site_metrics());
|
|
||||||
}
|
|
||||||
|
|
||||||
if service.contains("docker") && active_status == "active" {
|
|
||||||
metrics.extend(self.get_docker_containers());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
debug!("Failed to get status for service {}: {}", service, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let collection_time = start_time.elapsed();
|
|
||||||
debug!(
|
|
||||||
"Systemd collection completed in {:?} with {} individual service metrics",
|
|
||||||
collection_time,
|
|
||||||
metrics.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(metrics)
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SystemdCollector {
|
|
||||||
/// Get nginx sites with latency checks
|
|
||||||
fn get_nginx_sites(&self) -> Vec<Metric> {
|
|
||||||
let mut metrics = Vec::new();
|
|
||||||
let timestamp = chrono::Utc::now().timestamp() as u64;
|
|
||||||
|
|
||||||
// Discover nginx sites from configuration
|
// Discover nginx sites from configuration
|
||||||
let sites = self.discover_nginx_sites();
|
let discovered_sites = self.discover_nginx_sites();
|
||||||
|
|
||||||
for (site_name, url) in &sites {
|
// Always add all discovered sites, even if checks fail (like old version)
|
||||||
|
for (site_name, url) in &discovered_sites {
|
||||||
match self.check_site_latency(url) {
|
match self.check_site_latency(url) {
|
||||||
Ok(latency_ms) => {
|
Ok(latency_ms) => {
|
||||||
let status = if latency_ms < 500.0 {
|
sites.push((site_name.clone(), latency_ms));
|
||||||
Status::Ok
|
|
||||||
} else if latency_ms < 2000.0 {
|
|
||||||
Status::Warning
|
|
||||||
} else {
|
|
||||||
Status::Critical
|
|
||||||
};
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("service_nginx_{}_latency_ms", site_name),
|
|
||||||
value: MetricValue::Float(latency_ms),
|
|
||||||
unit: Some("ms".to_string()),
|
|
||||||
description: Some(format!("Response time for {}", url)),
|
|
||||||
status,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
// Site is unreachable
|
// Site is unreachable - use -1.0 to indicate error (like old version)
|
||||||
metrics.push(Metric {
|
sites.push((site_name.clone(), -1.0));
|
||||||
name: format!("service_nginx_{}_latency_ms", site_name),
|
|
||||||
value: MetricValue::Float(-1.0), // Use -1 to indicate error
|
|
||||||
unit: Some("ms".to_string()),
|
|
||||||
description: Some(format!("Response time for {} (unreachable)", url)),
|
|
||||||
status: Status::Critical,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
metrics
|
sites
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get docker containers as sub-services
|
/// Discover nginx sites from configuration
|
||||||
fn get_docker_containers(&self) -> Vec<Metric> {
|
|
||||||
let mut metrics = Vec::new();
|
|
||||||
let timestamp = chrono::Utc::now().timestamp() as u64;
|
|
||||||
|
|
||||||
// Check if docker is available
|
|
||||||
let output = Command::new("docker")
|
|
||||||
.arg("ps")
|
|
||||||
.arg("--format")
|
|
||||||
.arg("{{.Names}},{{.Status}}")
|
|
||||||
.output();
|
|
||||||
|
|
||||||
let output = match output {
|
|
||||||
Ok(out) if out.status.success() => out,
|
|
||||||
_ => return metrics, // Docker not available or failed
|
|
||||||
};
|
|
||||||
|
|
||||||
let output_str = match String::from_utf8(output.stdout) {
|
|
||||||
Ok(s) => s,
|
|
||||||
Err(_) => return metrics,
|
|
||||||
};
|
|
||||||
|
|
||||||
for line in output_str.lines() {
|
|
||||||
if line.trim().is_empty() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let parts: Vec<&str> = line.split(',').collect();
|
|
||||||
if parts.len() >= 2 {
|
|
||||||
let container_name = parts[0].trim();
|
|
||||||
let status_str = parts[1].trim();
|
|
||||||
|
|
||||||
let status = if status_str.contains("Up") {
|
|
||||||
Status::Ok
|
|
||||||
} else if status_str.contains("Exited") {
|
|
||||||
Status::Warning
|
|
||||||
} else {
|
|
||||||
Status::Critical
|
|
||||||
};
|
|
||||||
|
|
||||||
metrics.push(Metric {
|
|
||||||
name: format!("service_docker_{}_status", container_name),
|
|
||||||
value: MetricValue::String(status_str.to_string()),
|
|
||||||
unit: None,
|
|
||||||
description: Some(format!("Docker container {} status", container_name)),
|
|
||||||
status,
|
|
||||||
timestamp,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check site latency using HTTP GET requests
|
|
||||||
fn check_site_latency(&self, url: &str) -> Result<f32, Box<dyn std::error::Error>> {
|
|
||||||
use std::time::Duration;
|
|
||||||
use std::time::Instant;
|
|
||||||
|
|
||||||
let start = Instant::now();
|
|
||||||
|
|
||||||
// Create HTTP client with timeouts (similar to legacy implementation)
|
|
||||||
let client = reqwest::blocking::Client::builder()
|
|
||||||
.timeout(Duration::from_secs(10))
|
|
||||||
.connect_timeout(Duration::from_secs(10))
|
|
||||||
.redirect(reqwest::redirect::Policy::limited(10))
|
|
||||||
.build()?;
|
|
||||||
|
|
||||||
// Make GET request and measure latency
|
|
||||||
let response = client.get(url).send()?;
|
|
||||||
let latency = start.elapsed().as_millis() as f32;
|
|
||||||
|
|
||||||
// Check if response is successful (2xx or 3xx status codes)
|
|
||||||
if response.status().is_success() || response.status().is_redirection() {
|
|
||||||
Ok(latency)
|
|
||||||
} else {
|
|
||||||
Err(format!(
|
|
||||||
"HTTP request failed for {} with status: {}",
|
|
||||||
url,
|
|
||||||
response.status()
|
|
||||||
)
|
|
||||||
.into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Discover nginx sites from configuration files (like the old working implementation)
|
|
||||||
fn discover_nginx_sites(&self) -> Vec<(String, String)> {
|
fn discover_nginx_sites(&self) -> Vec<(String, String)> {
|
||||||
use tracing::debug;
|
|
||||||
|
|
||||||
// Use the same approach as the old working agent: get nginx config from systemd
|
// Use the same approach as the old working agent: get nginx config from systemd
|
||||||
let config_content = match self.get_nginx_config_from_systemd() {
|
let config_content = match self.get_nginx_config_from_systemd() {
|
||||||
Some(content) => content,
|
Some(content) => content,
|
||||||
@ -662,12 +635,25 @@ impl SystemdCollector {
|
|||||||
self.parse_nginx_config_for_sites(&config_content)
|
self.parse_nginx_config_for_sites(&config_content)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Fallback: get nginx config via nginx -T command
|
||||||
|
fn get_nginx_config_via_command(&self) -> Option<String> {
|
||||||
|
let output = Command::new("nginx")
|
||||||
|
.args(&["-T"])
|
||||||
|
.output()
|
||||||
|
.ok()?;
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
debug!("nginx -T failed");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(String::from_utf8_lossy(&output.stdout).to_string())
|
||||||
|
}
|
||||||
|
|
||||||
/// Get nginx config from systemd service definition (NixOS compatible)
|
/// Get nginx config from systemd service definition (NixOS compatible)
|
||||||
fn get_nginx_config_from_systemd(&self) -> Option<String> {
|
fn get_nginx_config_from_systemd(&self) -> Option<String> {
|
||||||
use tracing::debug;
|
let output = Command::new("systemctl")
|
||||||
|
.args(&["show", "nginx", "--property=ExecStart", "--no-pager"])
|
||||||
let output = std::process::Command::new("systemctl")
|
|
||||||
.args(["show", "nginx", "--property=ExecStart", "--no-pager"])
|
|
||||||
.output()
|
.output()
|
||||||
.ok()?;
|
.ok()?;
|
||||||
|
|
||||||
@ -683,13 +669,9 @@ impl SystemdCollector {
|
|||||||
for line in stdout.lines() {
|
for line in stdout.lines() {
|
||||||
if line.starts_with("ExecStart=") {
|
if line.starts_with("ExecStart=") {
|
||||||
debug!("Found ExecStart line: {}", line);
|
debug!("Found ExecStart line: {}", line);
|
||||||
// Handle both traditional and NixOS systemd formats
|
|
||||||
if let Some(config_path) = self.extract_config_path_from_exec_start(line) {
|
if let Some(config_path) = self.extract_config_path_from_exec_start(line) {
|
||||||
debug!("Extracted config path: {}", config_path);
|
debug!("Extracted config path: {}", config_path);
|
||||||
// Read the config file
|
return std::fs::read_to_string(&config_path).ok();
|
||||||
return std::fs::read_to_string(&config_path)
|
|
||||||
.map_err(|e| debug!("Failed to read config file {}: {}", config_path, e))
|
|
||||||
.ok();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -699,8 +681,6 @@ impl SystemdCollector {
|
|||||||
|
|
||||||
/// Extract config path from ExecStart line
|
/// Extract config path from ExecStart line
|
||||||
fn extract_config_path_from_exec_start(&self, exec_start: &str) -> Option<String> {
|
fn extract_config_path_from_exec_start(&self, exec_start: &str) -> Option<String> {
|
||||||
use tracing::debug;
|
|
||||||
|
|
||||||
// Remove ExecStart= prefix
|
// Remove ExecStart= prefix
|
||||||
let exec_part = exec_start.strip_prefix("ExecStart=")?;
|
let exec_part = exec_start.strip_prefix("ExecStart=")?;
|
||||||
debug!("Parsing exec part: {}", exec_part);
|
debug!("Parsing exec part: {}", exec_part);
|
||||||
@ -732,26 +712,8 @@ impl SystemdCollector {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fallback: get nginx config via nginx -T command
|
|
||||||
fn get_nginx_config_via_command(&self) -> Option<String> {
|
|
||||||
use tracing::debug;
|
|
||||||
|
|
||||||
let output = std::process::Command::new("nginx")
|
|
||||||
.args(["-T"])
|
|
||||||
.output()
|
|
||||||
.ok()?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
debug!("nginx -T failed");
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
Some(String::from_utf8_lossy(&output.stdout).to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse nginx config content to extract server names and build site list
|
/// Parse nginx config content to extract server names and build site list
|
||||||
fn parse_nginx_config_for_sites(&self, config_content: &str) -> Vec<(String, String)> {
|
fn parse_nginx_config_for_sites(&self, config_content: &str) -> Vec<(String, String)> {
|
||||||
use tracing::debug;
|
|
||||||
let mut sites = Vec::new();
|
let mut sites = Vec::new();
|
||||||
let lines: Vec<&str> = config_content.lines().collect();
|
let lines: Vec<&str> = config_content.lines().collect();
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
@ -775,7 +737,6 @@ impl SystemdCollector {
|
|||||||
|
|
||||||
/// Parse a server block to extract the primary server_name
|
/// Parse a server block to extract the primary server_name
|
||||||
fn parse_server_block(&self, lines: &[&str], start_index: &mut usize) -> Option<String> {
|
fn parse_server_block(&self, lines: &[&str], start_index: &mut usize) -> Option<String> {
|
||||||
use tracing::debug;
|
|
||||||
let mut server_names = Vec::new();
|
let mut server_names = Vec::new();
|
||||||
let mut has_redirect = false;
|
let mut has_redirect = false;
|
||||||
let mut i = *start_index + 1;
|
let mut i = *start_index + 1;
|
||||||
@ -806,7 +767,6 @@ impl SystemdCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Check for redirects (skip redirect-only servers)
|
// Check for redirects (skip redirect-only servers)
|
||||||
if trimmed.contains("return") && (trimmed.contains("301") || trimmed.contains("302")) {
|
if trimmed.contains("return") && (trimmed.contains("301") || trimmed.contains("302")) {
|
||||||
has_redirect = true;
|
has_redirect = true;
|
||||||
@ -823,4 +783,627 @@ impl SystemdCollector {
|
|||||||
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check site latency using HTTP GET requests
|
||||||
|
fn check_site_latency(&self, url: &str) -> Result<f32, Box<dyn std::error::Error>> {
|
||||||
|
use std::time::Duration;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
|
// Create HTTP client with timeouts from configuration
|
||||||
|
let client = reqwest::blocking::Client::builder()
|
||||||
|
.timeout(Duration::from_secs(self.config.http_timeout_seconds))
|
||||||
|
.connect_timeout(Duration::from_secs(self.config.http_connect_timeout_seconds))
|
||||||
|
.redirect(reqwest::redirect::Policy::limited(10))
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
// Make GET request and measure latency
|
||||||
|
let response = client.get(url).send()?;
|
||||||
|
let latency = start.elapsed().as_millis() as f32;
|
||||||
|
|
||||||
|
// Check if response is successful (2xx or 3xx status codes)
|
||||||
|
if response.status().is_success() || response.status().is_redirection() {
|
||||||
|
Ok(latency)
|
||||||
|
} else {
|
||||||
|
Err(format!(
|
||||||
|
"HTTP request failed for {} with status: {}",
|
||||||
|
url,
|
||||||
|
response.status()
|
||||||
|
)
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get docker containers as sub-services
|
||||||
|
fn get_docker_containers(&self) -> Vec<(String, String)> {
|
||||||
|
let mut containers = Vec::new();
|
||||||
|
|
||||||
|
// Check if docker is available (cm-agent user is in docker group)
|
||||||
|
// Use -a to show ALL containers (running and stopped) with 3 second timeout
|
||||||
|
let output = Command::new("timeout")
|
||||||
|
.args(&["3", "docker", "ps", "-a", "--format", "{{.Names}},{{.Status}}"])
|
||||||
|
.output();
|
||||||
|
|
||||||
|
let output = match output {
|
||||||
|
Ok(out) if out.status.success() => out,
|
||||||
|
_ => return containers, // Docker not available or failed
|
||||||
|
};
|
||||||
|
|
||||||
|
let output_str = match String::from_utf8(output.stdout) {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(_) => return containers,
|
||||||
|
};
|
||||||
|
|
||||||
|
for line in output_str.lines() {
|
||||||
|
if line.trim().is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let parts: Vec<&str> = line.split(',').collect();
|
||||||
|
if parts.len() >= 2 {
|
||||||
|
let container_name = parts[0].trim();
|
||||||
|
let status_str = parts[1].trim();
|
||||||
|
|
||||||
|
let container_status = if status_str.contains("Up") {
|
||||||
|
"active"
|
||||||
|
} else if status_str.contains("Exited") || status_str.contains("Created") {
|
||||||
|
"inactive" // Stopped/created containers are inactive
|
||||||
|
} else {
|
||||||
|
"failed" // Other states (restarting, paused, dead) → failed
|
||||||
|
};
|
||||||
|
|
||||||
|
containers.push((format!("docker_{}", container_name), container_status.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
containers
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get docker images as sub-services
|
||||||
|
fn get_docker_images(&self) -> Vec<(String, String, f32)> {
|
||||||
|
let mut images = Vec::new();
|
||||||
|
// Check if docker is available (cm-agent user is in docker group) with 3 second timeout
|
||||||
|
let output = Command::new("timeout")
|
||||||
|
.args(&["3", "docker", "images", "--format", "{{.Repository}}:{{.Tag}},{{.Size}}"])
|
||||||
|
.output();
|
||||||
|
|
||||||
|
let output = match output {
|
||||||
|
Ok(out) if out.status.success() => out,
|
||||||
|
Ok(_) => {
|
||||||
|
return images;
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
return images;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let output_str = match String::from_utf8(output.stdout) {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(_) => return images,
|
||||||
|
};
|
||||||
|
|
||||||
|
for line in output_str.lines() {
|
||||||
|
if line.trim().is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let parts: Vec<&str> = line.split(',').collect();
|
||||||
|
if parts.len() >= 2 {
|
||||||
|
let image_name = parts[0].trim();
|
||||||
|
let size_str = parts[1].trim();
|
||||||
|
|
||||||
|
// Skip <none>:<none> images (dangling images)
|
||||||
|
if image_name.contains("<none>") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse size to MB (sizes come as "142MB", "1.5GB", "512kB", etc.)
|
||||||
|
let size_mb = self.parse_docker_size(size_str);
|
||||||
|
|
||||||
|
images.push((
|
||||||
|
image_name.to_string(),
|
||||||
|
"inactive".to_string(), // Images are informational - use inactive for neutral display
|
||||||
|
size_mb
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
images
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse Docker size string to MB
|
||||||
|
fn parse_docker_size(&self, size_str: &str) -> f32 {
|
||||||
|
let size_upper = size_str.to_uppercase();
|
||||||
|
|
||||||
|
// Extract numeric part and unit
|
||||||
|
let mut num_str = String::new();
|
||||||
|
let mut unit = String::new();
|
||||||
|
|
||||||
|
for ch in size_upper.chars() {
|
||||||
|
if ch.is_ascii_digit() || ch == '.' {
|
||||||
|
num_str.push(ch);
|
||||||
|
} else if ch.is_alphabetic() {
|
||||||
|
unit.push(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let value: f32 = num_str.parse().unwrap_or(0.0);
|
||||||
|
|
||||||
|
// Convert to MB
|
||||||
|
match unit.as_str() {
|
||||||
|
"KB" | "K" => value / 1024.0,
|
||||||
|
"MB" | "M" => value,
|
||||||
|
"GB" | "G" => value * 1024.0,
|
||||||
|
"TB" | "T" => value * 1024.0 * 1024.0,
|
||||||
|
_ => value, // Assume bytes if no unit
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get VPN external IP by querying through the vpn namespace
|
||||||
|
fn get_vpn_external_ip(&self) -> Option<String> {
|
||||||
|
let output = Command::new("timeout")
|
||||||
|
.args(&[
|
||||||
|
"5",
|
||||||
|
"sudo",
|
||||||
|
"ip",
|
||||||
|
"netns",
|
||||||
|
"exec",
|
||||||
|
"vpn",
|
||||||
|
"curl",
|
||||||
|
"-s",
|
||||||
|
"--max-time",
|
||||||
|
"4",
|
||||||
|
"https://ifconfig.me"
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.ok()?;
|
||||||
|
|
||||||
|
if output.status.success() {
|
||||||
|
let ip = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
||||||
|
if !ip.is_empty() && ip.contains('.') {
|
||||||
|
return Some(ip);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get Tailscale connected peers with their connection methods
|
||||||
|
/// Returns a list of (device_name, connection_method) tuples
|
||||||
|
fn get_tailscale_peers(&self) -> Vec<(String, String)> {
|
||||||
|
match Command::new("timeout")
|
||||||
|
.args(["2", "tailscale", "status"])
|
||||||
|
.output()
|
||||||
|
{
|
||||||
|
Ok(output) if output.status.success() => {
|
||||||
|
let status_output = String::from_utf8_lossy(&output.stdout);
|
||||||
|
let mut peers = Vec::new();
|
||||||
|
|
||||||
|
// Get current hostname to filter it out
|
||||||
|
let current_hostname = gethostname::gethostname()
|
||||||
|
.to_string_lossy()
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
// Parse tailscale status output
|
||||||
|
// Format: IP hostname user os status
|
||||||
|
// Example: 100.110.98.3 wslbox cm@ linux active; direct 192.168.30.227:53757
|
||||||
|
// Note: First line is always the current host, skip it
|
||||||
|
for (idx, line) in status_output.lines().enumerate() {
|
||||||
|
if idx == 0 {
|
||||||
|
continue; // Skip first line (current host)
|
||||||
|
}
|
||||||
|
|
||||||
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if parts.len() < 5 {
|
||||||
|
continue; // Skip invalid lines
|
||||||
|
}
|
||||||
|
|
||||||
|
// parts[0] = IP
|
||||||
|
// parts[1] = hostname
|
||||||
|
// parts[2] = user
|
||||||
|
// parts[3] = OS
|
||||||
|
// parts[4+] = status (e.g., "active;", "direct", "192.168.30.227:53757" or "idle;" or "offline")
|
||||||
|
|
||||||
|
let hostname = parts[1];
|
||||||
|
|
||||||
|
// Skip if this is the current host (double-check in case format changes)
|
||||||
|
if hostname == current_hostname {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let status_parts = &parts[4..];
|
||||||
|
|
||||||
|
// Determine connection method from status
|
||||||
|
let connection_method = if status_parts.is_empty() {
|
||||||
|
continue; // Skip if no status
|
||||||
|
} else {
|
||||||
|
let status_str = status_parts.join(" ");
|
||||||
|
if status_str.contains("offline") {
|
||||||
|
continue; // Skip offline peers
|
||||||
|
} else if status_str.contains("direct") {
|
||||||
|
"direct"
|
||||||
|
} else if status_str.contains("relay") {
|
||||||
|
"relay"
|
||||||
|
} else if status_str.contains("idle") {
|
||||||
|
"idle"
|
||||||
|
} else if status_str.contains("active") {
|
||||||
|
"active"
|
||||||
|
} else {
|
||||||
|
continue; // Skip unknown status
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
peers.push((hostname.to_string(), connection_method.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
peers
|
||||||
|
}
|
||||||
|
_ => Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get NFS exports from exportfs
|
||||||
|
/// Returns a list of (export_path, info_string) tuples
|
||||||
|
fn get_nfs_exports(&self) -> Vec<(String, String)> {
|
||||||
|
let output = match Command::new("timeout")
|
||||||
|
.args(["2", "exportfs", "-v"])
|
||||||
|
.output()
|
||||||
|
{
|
||||||
|
Ok(output) if output.status.success() => output,
|
||||||
|
_ => return Vec::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let exports_output = String::from_utf8_lossy(&output.stdout);
|
||||||
|
let mut exports_map: std::collections::HashMap<String, Vec<(String, String)>> =
|
||||||
|
std::collections::HashMap::new();
|
||||||
|
let mut current_path: Option<String> = None;
|
||||||
|
|
||||||
|
for line in exports_output.lines() {
|
||||||
|
let trimmed = line.trim();
|
||||||
|
|
||||||
|
if trimmed.is_empty() || trimmed.starts_with('#') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if trimmed.starts_with('/') {
|
||||||
|
// Export path line - may have network on same line or continuation
|
||||||
|
let parts: Vec<&str> = trimmed.splitn(2, char::is_whitespace).collect();
|
||||||
|
let path = parts[0].to_string();
|
||||||
|
current_path = Some(path.clone());
|
||||||
|
|
||||||
|
// Check if network info is on the same line
|
||||||
|
if parts.len() > 1 {
|
||||||
|
let rest = parts[1].trim();
|
||||||
|
if let Some(paren_pos) = rest.find('(') {
|
||||||
|
let network = rest[..paren_pos].trim();
|
||||||
|
|
||||||
|
if let Some(end_paren) = rest.find(')') {
|
||||||
|
let options = &rest[paren_pos+1..end_paren];
|
||||||
|
let mode = if options.contains(",rw,") || options.ends_with(",rw") {
|
||||||
|
"rw"
|
||||||
|
} else {
|
||||||
|
"ro"
|
||||||
|
};
|
||||||
|
|
||||||
|
exports_map.entry(path)
|
||||||
|
.or_insert_with(Vec::new)
|
||||||
|
.push((network.to_string(), mode.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if let Some(ref path) = current_path {
|
||||||
|
// Continuation line with network and options
|
||||||
|
if let Some(paren_pos) = trimmed.find('(') {
|
||||||
|
let network = trimmed[..paren_pos].trim();
|
||||||
|
|
||||||
|
if let Some(end_paren) = trimmed.find(')') {
|
||||||
|
let options = &trimmed[paren_pos+1..end_paren];
|
||||||
|
let mode = if options.contains(",rw,") || options.ends_with(",rw") {
|
||||||
|
"rw"
|
||||||
|
} else {
|
||||||
|
"ro"
|
||||||
|
};
|
||||||
|
|
||||||
|
exports_map.entry(path.clone())
|
||||||
|
.or_insert_with(Vec::new)
|
||||||
|
.push((network.to_string(), mode.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build display strings: "path: mode [networks]"
|
||||||
|
let mut exports: Vec<(String, String)> = exports_map
|
||||||
|
.into_iter()
|
||||||
|
.map(|(path, mut entries)| {
|
||||||
|
if entries.is_empty() {
|
||||||
|
return (path, String::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mode = entries[0].1.clone();
|
||||||
|
let networks: Vec<String> = entries.drain(..).map(|(n, _)| n).collect();
|
||||||
|
let info = format!("{} [{}]", mode, networks.join(", "));
|
||||||
|
(path, info)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
exports.sort_by(|a, b| a.0.cmp(&b.0));
|
||||||
|
exports
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get SMB shares from smb.conf
|
||||||
|
/// Returns a list of (share_name, share_path) tuples
|
||||||
|
fn get_smb_shares(&self) -> Vec<(String, String)> {
|
||||||
|
match std::fs::read_to_string("/etc/samba/smb.conf") {
|
||||||
|
Ok(config) => {
|
||||||
|
let mut shares = Vec::new();
|
||||||
|
let mut current_share: Option<String> = None;
|
||||||
|
let mut current_path: Option<String> = None;
|
||||||
|
|
||||||
|
for line in config.lines() {
|
||||||
|
let line = line.trim();
|
||||||
|
|
||||||
|
// Skip comments and empty lines
|
||||||
|
if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect share section [sharename]
|
||||||
|
if line.starts_with('[') && line.ends_with(']') {
|
||||||
|
// Save previous share if we have both name and path
|
||||||
|
if let (Some(name), Some(path)) = (current_share.take(), current_path.take()) {
|
||||||
|
// Skip special sections
|
||||||
|
if name != "global" && name != "homes" && name != "printers" {
|
||||||
|
shares.push((name, path));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start new share
|
||||||
|
let share_name = line[1..line.len()-1].trim().to_string();
|
||||||
|
current_share = Some(share_name);
|
||||||
|
current_path = None;
|
||||||
|
}
|
||||||
|
// Look for path = /some/path
|
||||||
|
else if line.starts_with("path") && line.contains('=') {
|
||||||
|
if let Some(path_value) = line.split('=').nth(1) {
|
||||||
|
current_path = Some(path_value.trim().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't forget the last share
|
||||||
|
if let (Some(name), Some(path)) = (current_share, current_path) {
|
||||||
|
if name != "global" && name != "homes" && name != "printers" {
|
||||||
|
shares.push((name, path));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
shares
|
||||||
|
}
|
||||||
|
_ => Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get nftables open ports grouped by protocol
|
||||||
|
/// Returns: (tcp_ports_string, udp_ports_string)
|
||||||
|
fn get_nftables_open_ports(&self) -> (String, String) {
|
||||||
|
let output = Command::new("sudo")
|
||||||
|
.args(&["/run/current-system/sw/bin/nft", "list", "ruleset"])
|
||||||
|
.output();
|
||||||
|
|
||||||
|
let output = match output {
|
||||||
|
Ok(out) if out.status.success() => out,
|
||||||
|
Ok(out) => {
|
||||||
|
info!("nft command failed with status: {:?}, stderr: {}",
|
||||||
|
out.status, String::from_utf8_lossy(&out.stderr));
|
||||||
|
return (String::new(), String::new());
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
info!("Failed to execute nft command: {}", e);
|
||||||
|
return (String::new(), String::new());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let output_str = match String::from_utf8(output.stdout) {
|
||||||
|
Ok(s) => s,
|
||||||
|
Err(_) => {
|
||||||
|
info!("Failed to parse nft output as UTF-8");
|
||||||
|
return (String::new(), String::new());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut tcp_ports = std::collections::HashSet::new();
|
||||||
|
let mut udp_ports = std::collections::HashSet::new();
|
||||||
|
|
||||||
|
// Parse nftables output for WAN incoming accept rules with dport
|
||||||
|
// Looking for patterns like: tcp dport 22 accept or tcp dport { 22, 80, 443 } accept
|
||||||
|
// Only include rules in input_wan chain
|
||||||
|
let mut in_wan_chain = false;
|
||||||
|
|
||||||
|
for line in output_str.lines() {
|
||||||
|
let line = line.trim();
|
||||||
|
|
||||||
|
// Track if we're in the input_wan chain
|
||||||
|
if line.contains("chain input_wan") {
|
||||||
|
in_wan_chain = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset when exiting chain (closing brace) or entering other chains
|
||||||
|
if line == "}" || (line.starts_with("chain ") && !line.contains("input_wan")) {
|
||||||
|
in_wan_chain = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only process rules in input_wan chain
|
||||||
|
if !in_wan_chain {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip if not an accept rule
|
||||||
|
if !line.contains("accept") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse TCP ports
|
||||||
|
if line.contains("tcp dport") {
|
||||||
|
for port in self.extract_ports_from_nft_rule(line) {
|
||||||
|
tcp_ports.insert(port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse UDP ports
|
||||||
|
if line.contains("udp dport") {
|
||||||
|
for port in self.extract_ports_from_nft_rule(line) {
|
||||||
|
udp_ports.insert(port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort and format
|
||||||
|
let mut tcp_vec: Vec<u16> = tcp_ports.into_iter().collect();
|
||||||
|
let mut udp_vec: Vec<u16> = udp_ports.into_iter().collect();
|
||||||
|
tcp_vec.sort();
|
||||||
|
udp_vec.sort();
|
||||||
|
|
||||||
|
let tcp_str = tcp_vec.iter().map(|p| p.to_string()).collect::<Vec<_>>().join(", ");
|
||||||
|
let udp_str = udp_vec.iter().map(|p| p.to_string()).collect::<Vec<_>>().join(", ");
|
||||||
|
|
||||||
|
info!("nftables WAN ports - TCP: '{}', UDP: '{}'", tcp_str, udp_str);
|
||||||
|
|
||||||
|
(tcp_str, udp_str)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract port numbers from nftables rule line
|
||||||
|
/// Returns vector of ports (handles both single ports and sets)
|
||||||
|
fn extract_ports_from_nft_rule(&self, line: &str) -> Vec<u16> {
|
||||||
|
let mut ports = Vec::new();
|
||||||
|
|
||||||
|
// Pattern: "tcp dport 22 accept" or "tcp dport { 22, 80, 443 } accept"
|
||||||
|
if let Some(dport_pos) = line.find("dport") {
|
||||||
|
let after_dport = &line[dport_pos + 5..].trim();
|
||||||
|
|
||||||
|
// Handle port sets like { 22, 80, 443 }
|
||||||
|
if after_dport.starts_with('{') {
|
||||||
|
if let Some(end_brace) = after_dport.find('}') {
|
||||||
|
let ports_str = &after_dport[1..end_brace];
|
||||||
|
// Parse each port in the set
|
||||||
|
for port_str in ports_str.split(',') {
|
||||||
|
if let Ok(port) = port_str.trim().parse::<u16>() {
|
||||||
|
ports.push(port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Single port
|
||||||
|
if let Some(port_str) = after_dport.split_whitespace().next() {
|
||||||
|
if let Ok(port) = port_str.parse::<u16>() {
|
||||||
|
ports.push(port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ports
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get aggregate qBittorrent torrent statistics
|
||||||
|
/// Returns: (active_count, download_mbps, upload_mbps)
|
||||||
|
fn get_qbittorrent_stats(&self) -> Option<(u32, f32, f32)> {
|
||||||
|
// Query qBittorrent API through VPN namespace
|
||||||
|
let output = Command::new("timeout")
|
||||||
|
.args(&[
|
||||||
|
"5",
|
||||||
|
"sudo",
|
||||||
|
"ip",
|
||||||
|
"netns",
|
||||||
|
"exec",
|
||||||
|
"vpn",
|
||||||
|
"curl",
|
||||||
|
"-s",
|
||||||
|
"--max-time",
|
||||||
|
"4",
|
||||||
|
"http://localhost:8080/api/v2/torrents/info"
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.ok()?;
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
let torrents: Vec<serde_json::Value> = serde_json::from_str(&output_str).ok()?;
|
||||||
|
|
||||||
|
let mut active_count = 0u32;
|
||||||
|
let mut total_download_bps = 0.0f64;
|
||||||
|
let mut total_upload_bps = 0.0f64;
|
||||||
|
|
||||||
|
for torrent in torrents {
|
||||||
|
let state = torrent["state"].as_str().unwrap_or("");
|
||||||
|
let dlspeed = torrent["dlspeed"].as_f64().unwrap_or(0.0);
|
||||||
|
let upspeed = torrent["upspeed"].as_f64().unwrap_or(0.0);
|
||||||
|
|
||||||
|
// States: downloading, uploading, stalledDL, stalledUP, queuedDL, queuedUP, pausedDL, pausedUP
|
||||||
|
// Count as active if downloading or uploading (seeding)
|
||||||
|
if state.contains("downloading") || state.contains("uploading") ||
|
||||||
|
state == "stalledDL" || state == "stalledUP" {
|
||||||
|
active_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
total_download_bps += dlspeed;
|
||||||
|
total_upload_bps += upspeed;
|
||||||
|
}
|
||||||
|
|
||||||
|
// qBittorrent returns bytes/s, convert to MB/s
|
||||||
|
let download_mbps = (total_download_bps / 1024.0 / 1024.0) as f32;
|
||||||
|
let upload_mbps = (total_upload_bps / 1024.0 / 1024.0) as f32;
|
||||||
|
|
||||||
|
Some((active_count, download_mbps, upload_mbps))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check for active torrent copy operations
|
||||||
|
/// Returns: Vec of filenames currently being copied
|
||||||
|
fn get_active_torrent_copies(&self) -> Vec<String> {
|
||||||
|
let marker_dir = "/tmp/torrent-copy";
|
||||||
|
let mut active_copies = Vec::new();
|
||||||
|
|
||||||
|
// Read all marker files from directory
|
||||||
|
if let Ok(entries) = std::fs::read_dir(marker_dir) {
|
||||||
|
for entry in entries.flatten() {
|
||||||
|
if let Ok(file_type) = entry.file_type() {
|
||||||
|
if file_type.is_file() {
|
||||||
|
// Filename is the marker (sanitized torrent name)
|
||||||
|
if let Some(filename) = entry.file_name().to_str() {
|
||||||
|
// Convert sanitized name back (replace _ with /)
|
||||||
|
let display_name = filename.replace('_', "/");
|
||||||
|
active_copies.push(display_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
active_copies
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Collector for SystemdCollector {
|
||||||
|
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
|
// Clear services to prevent duplicates when updating cached data
|
||||||
|
agent_data.services.clear();
|
||||||
|
|
||||||
|
// Use cached complete data if available and fresh
|
||||||
|
if let Some(cached_complete_services) = self.get_cached_complete_services() {
|
||||||
|
for service_data in cached_complete_services {
|
||||||
|
agent_data.services.push(service_data);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
// Collect fresh data
|
||||||
|
self.collect_service_data(agent_data).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
403
agent/src/collectors/systemd_old.rs
Normal file
403
agent/src/collectors/systemd_old.rs
Normal file
@ -0,0 +1,403 @@
|
|||||||
|
use anyhow::Result;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use cm_dashboard_shared::{AgentData, ServiceData, Status};
|
||||||
|
use std::process::Command;
|
||||||
|
use std::sync::RwLock;
|
||||||
|
use std::time::Instant;
|
||||||
|
use tracing::debug;
|
||||||
|
|
||||||
|
use super::{Collector, CollectorError};
|
||||||
|
use crate::config::SystemdConfig;
|
||||||
|
|
||||||
|
/// Systemd collector for monitoring systemd services with structured data output
|
||||||
|
pub struct SystemdCollector {
|
||||||
|
/// Cached state with thread-safe interior mutability
|
||||||
|
state: RwLock<ServiceCacheState>,
|
||||||
|
/// Configuration for service monitoring
|
||||||
|
config: SystemdConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Internal state for service caching
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct ServiceCacheState {
|
||||||
|
/// Last collection time for performance tracking
|
||||||
|
last_collection: Option<Instant>,
|
||||||
|
/// Cached service data
|
||||||
|
services: Vec<ServiceInfo>,
|
||||||
|
/// Interesting services to monitor (cached after discovery)
|
||||||
|
monitored_services: Vec<String>,
|
||||||
|
/// Cached service status information from discovery
|
||||||
|
service_status_cache: std::collections::HashMap<String, ServiceStatusInfo>,
|
||||||
|
/// Last time services were discovered
|
||||||
|
last_discovery_time: Option<Instant>,
|
||||||
|
/// How often to rediscover services (from config)
|
||||||
|
discovery_interval_seconds: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cached service status information from systemctl list-units
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct ServiceStatusInfo {
|
||||||
|
load_state: String,
|
||||||
|
active_state: String,
|
||||||
|
sub_state: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Internal service information
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct ServiceInfo {
|
||||||
|
name: String,
|
||||||
|
status: String, // "active", "inactive", "failed", etc.
|
||||||
|
memory_mb: f32, // Memory usage in MB
|
||||||
|
disk_gb: f32, // Disk usage in GB (usually 0 for services)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SystemdCollector {
|
||||||
|
pub fn new(config: SystemdConfig) -> Self {
|
||||||
|
let state = ServiceCacheState {
|
||||||
|
last_collection: None,
|
||||||
|
services: Vec::new(),
|
||||||
|
monitored_services: Vec::new(),
|
||||||
|
service_status_cache: std::collections::HashMap::new(),
|
||||||
|
last_discovery_time: None,
|
||||||
|
discovery_interval_seconds: config.interval_seconds,
|
||||||
|
};
|
||||||
|
|
||||||
|
Self {
|
||||||
|
state: RwLock::new(state),
|
||||||
|
config,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect service data and populate AgentData
|
||||||
|
async fn collect_service_data(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
|
let start_time = Instant::now();
|
||||||
|
debug!("Collecting systemd services metrics");
|
||||||
|
|
||||||
|
// Get cached services (discovery only happens when needed)
|
||||||
|
let monitored_services = match self.get_monitored_services() {
|
||||||
|
Ok(services) => services,
|
||||||
|
Err(e) => {
|
||||||
|
debug!("Failed to get monitored services: {}", e);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Collect service data for each monitored service
|
||||||
|
let mut services = Vec::new();
|
||||||
|
for service_name in &monitored_services {
|
||||||
|
match self.get_service_status(service_name) {
|
||||||
|
Ok((active_status, _detailed_info)) => {
|
||||||
|
let memory_mb = self.get_service_memory_usage(service_name).await.unwrap_or(0.0);
|
||||||
|
let disk_gb = self.get_service_disk_usage(service_name).await.unwrap_or(0.0);
|
||||||
|
|
||||||
|
let service_info = ServiceInfo {
|
||||||
|
name: service_name.clone(),
|
||||||
|
status: active_status,
|
||||||
|
memory_mb,
|
||||||
|
disk_gb,
|
||||||
|
};
|
||||||
|
services.push(service_info);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!("Failed to get status for service {}: {}", service_name, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update cached state
|
||||||
|
{
|
||||||
|
let mut state = self.state.write().unwrap();
|
||||||
|
state.last_collection = Some(start_time);
|
||||||
|
state.services = services.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populate AgentData with service information
|
||||||
|
for service in services {
|
||||||
|
agent_data.services.push(ServiceData {
|
||||||
|
name: service.name.clone(),
|
||||||
|
status: service.status.clone(),
|
||||||
|
memory_mb: service.memory_mb,
|
||||||
|
disk_gb: service.disk_gb,
|
||||||
|
user_stopped: false, // TODO: Integrate with service tracker
|
||||||
|
service_status: self.calculate_service_status(&service.name, &service.status),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let elapsed = start_time.elapsed();
|
||||||
|
debug!("Systemd collection completed in {:?} with {} services", elapsed, agent_data.services.len());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get systemd services information
|
||||||
|
async fn get_systemd_services(&self) -> Result<Vec<ServiceInfo>, CollectorError> {
|
||||||
|
let mut services = Vec::new();
|
||||||
|
|
||||||
|
// Get ALL service unit files (includes inactive services)
|
||||||
|
let unit_files_output = Command::new("systemctl")
|
||||||
|
.args(&["list-unit-files", "--type=service", "--no-pager", "--plain"])
|
||||||
|
.output()
|
||||||
|
.map_err(|e| CollectorError::SystemRead {
|
||||||
|
path: "systemctl list-unit-files".to_string(),
|
||||||
|
error: e.to_string(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Get runtime status of ALL units (including inactive)
|
||||||
|
let status_output = Command::new("systemctl")
|
||||||
|
.args(&["list-units", "--type=service", "--all", "--no-pager", "--plain"])
|
||||||
|
.output()
|
||||||
|
.map_err(|e| CollectorError::SystemRead {
|
||||||
|
path: "systemctl list-units --all".to_string(),
|
||||||
|
error: e.to_string(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let unit_files_str = String::from_utf8_lossy(&unit_files_output.stdout);
|
||||||
|
let status_str = String::from_utf8_lossy(&status_output.stdout);
|
||||||
|
|
||||||
|
// Parse all service unit files to get complete service list
|
||||||
|
let mut all_service_names = std::collections::HashSet::new();
|
||||||
|
for line in unit_files_str.lines() {
|
||||||
|
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if fields.len() >= 2 && fields[0].ends_with(".service") {
|
||||||
|
let service_name = fields[0].trim_end_matches(".service");
|
||||||
|
all_service_names.insert(service_name.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse runtime status for all units
|
||||||
|
let mut status_cache = std::collections::HashMap::new();
|
||||||
|
for line in status_str.lines() {
|
||||||
|
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||||
|
if fields.len() >= 4 && fields[0].ends_with(".service") {
|
||||||
|
let service_name = fields[0].trim_end_matches(".service");
|
||||||
|
let load_state = fields.get(1).unwrap_or(&"unknown").to_string();
|
||||||
|
let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
|
||||||
|
let sub_state = fields.get(3).unwrap_or(&"unknown").to_string();
|
||||||
|
status_cache.insert(service_name.to_string(), (load_state, active_state, sub_state));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For services found in unit files but not in runtime status, set default inactive status
|
||||||
|
for service_name in &all_service_names {
|
||||||
|
if !status_cache.contains_key(service_name) {
|
||||||
|
status_cache.insert(service_name.to_string(), (
|
||||||
|
"not-loaded".to_string(),
|
||||||
|
"inactive".to_string(),
|
||||||
|
"dead".to_string()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process all discovered services and apply filters
|
||||||
|
for service_name in &all_service_names {
|
||||||
|
if self.should_monitor_service(service_name) {
|
||||||
|
if let Some((load_state, active_state, sub_state)) = status_cache.get(service_name) {
|
||||||
|
let memory_mb = self.get_service_memory_usage(service_name).await.unwrap_or(0.0);
|
||||||
|
let disk_gb = self.get_service_disk_usage(service_name).await.unwrap_or(0.0);
|
||||||
|
|
||||||
|
let normalized_status = self.normalize_service_status(active_state, sub_state);
|
||||||
|
let service_info = ServiceInfo {
|
||||||
|
name: service_name.to_string(),
|
||||||
|
status: normalized_status,
|
||||||
|
memory_mb,
|
||||||
|
disk_gb,
|
||||||
|
};
|
||||||
|
|
||||||
|
services.push(service_info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(services)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a service should be monitored based on configuration filters with wildcard support
|
||||||
|
fn should_monitor_service(&self, service_name: &str) -> bool {
|
||||||
|
// If no filters configured, monitor nothing (to prevent noise)
|
||||||
|
if self.config.service_name_filters.is_empty() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if service matches any of the configured patterns
|
||||||
|
for pattern in &self.config.service_name_filters {
|
||||||
|
if self.matches_pattern(service_name, pattern) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if service name matches pattern (supports wildcards like nginx*)
|
||||||
|
fn matches_pattern(&self, service_name: &str, pattern: &str) -> bool {
|
||||||
|
if pattern.ends_with('*') {
|
||||||
|
let prefix = &pattern[..pattern.len() - 1];
|
||||||
|
service_name.starts_with(prefix)
|
||||||
|
} else {
|
||||||
|
service_name == pattern
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get disk usage for a specific service
|
||||||
|
async fn get_service_disk_usage(&self, service_name: &str) -> Result<f32, CollectorError> {
|
||||||
|
// Check if this service has configured directory paths
|
||||||
|
if let Some(dirs) = self.config.service_directories.get(service_name) {
|
||||||
|
// Service has configured paths - use the first accessible one
|
||||||
|
for dir in dirs {
|
||||||
|
if let Some(size) = self.get_directory_size(dir) {
|
||||||
|
return Ok(size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If configured paths failed, return 0
|
||||||
|
return Ok(0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No configured path - try to get WorkingDirectory from systemctl
|
||||||
|
let output = Command::new("systemctl")
|
||||||
|
.args(&["show", &format!("{}.service", service_name), "--property=WorkingDirectory"])
|
||||||
|
.output()
|
||||||
|
.map_err(|e| CollectorError::SystemRead {
|
||||||
|
path: format!("WorkingDirectory for {}", service_name),
|
||||||
|
error: e.to_string(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
for line in output_str.lines() {
|
||||||
|
if line.starts_with("WorkingDirectory=") && !line.contains("[not set]") {
|
||||||
|
let dir = line.strip_prefix("WorkingDirectory=").unwrap_or("");
|
||||||
|
if !dir.is_empty() {
|
||||||
|
return Ok(self.get_directory_size(dir).unwrap_or(0.0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(0.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get size of a directory in GB
|
||||||
|
fn get_directory_size(&self, path: &str) -> Option<f32> {
|
||||||
|
let output = Command::new("du")
|
||||||
|
.args(&["-sb", path])
|
||||||
|
.output()
|
||||||
|
.ok()?;
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
let parts: Vec<&str> = output_str.split_whitespace().collect();
|
||||||
|
if let Some(size_str) = parts.first() {
|
||||||
|
if let Ok(size_bytes) = size_str.parse::<u64>() {
|
||||||
|
return Some(size_bytes as f32 / (1024.0 * 1024.0 * 1024.0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate service status, taking user-stopped services into account
|
||||||
|
fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status {
|
||||||
|
match active_status.to_lowercase().as_str() {
|
||||||
|
"active" => Status::Ok,
|
||||||
|
"inactive" | "dead" => {
|
||||||
|
debug!("Service '{}' is inactive - treating as Inactive status", service_name);
|
||||||
|
Status::Inactive
|
||||||
|
},
|
||||||
|
"failed" | "error" => Status::Critical,
|
||||||
|
"activating" | "deactivating" | "reloading" | "starting" | "stopping" => {
|
||||||
|
debug!("Service '{}' is transitioning - treating as Pending", service_name);
|
||||||
|
Status::Pending
|
||||||
|
},
|
||||||
|
_ => Status::Unknown,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get memory usage for a specific service
|
||||||
|
async fn get_service_memory_usage(&self, service_name: &str) -> Result<f32, CollectorError> {
|
||||||
|
let output = Command::new("systemctl")
|
||||||
|
.args(&["show", &format!("{}.service", service_name), "--property=MemoryCurrent"])
|
||||||
|
.output()
|
||||||
|
.map_err(|e| CollectorError::SystemRead {
|
||||||
|
path: format!("memory usage for {}", service_name),
|
||||||
|
error: e.to_string(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||||
|
|
||||||
|
for line in output_str.lines() {
|
||||||
|
if line.starts_with("MemoryCurrent=") {
|
||||||
|
if let Some(mem_str) = line.strip_prefix("MemoryCurrent=") {
|
||||||
|
if mem_str != "[not set]" {
|
||||||
|
if let Ok(memory_bytes) = mem_str.parse::<u64>() {
|
||||||
|
return Ok(memory_bytes as f32 / (1024.0 * 1024.0)); // Convert to MB
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(0.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Normalize service status to standard values
|
||||||
|
fn normalize_service_status(&self, active_state: &str, sub_state: &str) -> String {
|
||||||
|
match (active_state, sub_state) {
|
||||||
|
("active", "running") => "active".to_string(),
|
||||||
|
("active", _) => "active".to_string(),
|
||||||
|
("inactive", "dead") => "inactive".to_string(),
|
||||||
|
("inactive", _) => "inactive".to_string(),
|
||||||
|
("failed", _) => "failed".to_string(),
|
||||||
|
("activating", _) => "starting".to_string(),
|
||||||
|
("deactivating", _) => "stopping".to_string(),
|
||||||
|
_ => format!("{}:{}", active_state, sub_state),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if service collection cache should be updated
|
||||||
|
fn should_update_cache(&self) -> bool {
|
||||||
|
let state = self.state.read().unwrap();
|
||||||
|
|
||||||
|
match state.last_collection {
|
||||||
|
None => true,
|
||||||
|
Some(last) => {
|
||||||
|
let cache_duration = std::time::Duration::from_secs(30);
|
||||||
|
last.elapsed() > cache_duration
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get cached service data if available and fresh
|
||||||
|
fn get_cached_services(&self) -> Option<Vec<ServiceInfo>> {
|
||||||
|
if !self.should_update_cache() {
|
||||||
|
let state = self.state.read().unwrap();
|
||||||
|
Some(state.services.clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Collector for SystemdCollector {
|
||||||
|
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||||
|
// Use cached data if available and fresh
|
||||||
|
if let Some(cached_services) = self.get_cached_services() {
|
||||||
|
debug!("Using cached systemd services data");
|
||||||
|
for service in cached_services {
|
||||||
|
agent_data.services.push(ServiceData {
|
||||||
|
name: service.name.clone(),
|
||||||
|
status: service.status.clone(),
|
||||||
|
memory_mb: service.memory_mb,
|
||||||
|
disk_gb: service.disk_gb,
|
||||||
|
user_stopped: false, // TODO: Integrate with service tracker
|
||||||
|
service_status: self.calculate_service_status(&service.name, &service.status),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
// Collect fresh data
|
||||||
|
self.collect_service_data(agent_data).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,14 +1,13 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use cm_dashboard_shared::{MessageEnvelope, MetricMessage};
|
use cm_dashboard_shared::{AgentData, MessageEnvelope};
|
||||||
use tracing::{debug, info};
|
use tracing::{debug, info};
|
||||||
use zmq::{Context, Socket, SocketType};
|
use zmq::{Context, Socket, SocketType};
|
||||||
|
|
||||||
use crate::config::ZmqConfig;
|
use crate::config::ZmqConfig;
|
||||||
|
|
||||||
/// ZMQ communication handler for publishing metrics and receiving commands
|
/// ZMQ communication handler for publishing metrics
|
||||||
pub struct ZmqHandler {
|
pub struct ZmqHandler {
|
||||||
publisher: Socket,
|
publisher: Socket,
|
||||||
command_receiver: Socket,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ZmqHandler {
|
impl ZmqHandler {
|
||||||
@ -26,34 +25,22 @@ impl ZmqHandler {
|
|||||||
publisher.set_sndhwm(1000)?; // High water mark for outbound messages
|
publisher.set_sndhwm(1000)?; // High water mark for outbound messages
|
||||||
publisher.set_linger(1000)?; // Linger time on close
|
publisher.set_linger(1000)?; // Linger time on close
|
||||||
|
|
||||||
// Create command receiver socket (PULL socket to receive commands from dashboard)
|
|
||||||
let command_receiver = context.socket(SocketType::PULL)?;
|
|
||||||
let cmd_bind_address = format!("tcp://{}:{}", config.bind_address, config.command_port);
|
|
||||||
command_receiver.bind(&cmd_bind_address)?;
|
|
||||||
|
|
||||||
info!("ZMQ command receiver bound to {}", cmd_bind_address);
|
|
||||||
|
|
||||||
// Set non-blocking mode for command receiver
|
|
||||||
command_receiver.set_rcvtimeo(0)?; // Non-blocking receive
|
|
||||||
command_receiver.set_linger(1000)?;
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
publisher,
|
publisher,
|
||||||
command_receiver,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Publish metrics message via ZMQ
|
|
||||||
pub async fn publish_metrics(&self, message: &MetricMessage) -> Result<()> {
|
/// Publish agent data via ZMQ
|
||||||
|
pub async fn publish_agent_data(&self, data: &AgentData) -> Result<()> {
|
||||||
debug!(
|
debug!(
|
||||||
"Publishing {} metrics for host {}",
|
"Publishing agent data for host {}",
|
||||||
message.metrics.len(),
|
data.hostname
|
||||||
message.hostname
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// Create message envelope
|
// Create message envelope for agent data
|
||||||
let envelope = MessageEnvelope::metrics(message.clone())
|
let envelope = MessageEnvelope::agent_data(data.clone())
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to create message envelope: {}", e))?;
|
.map_err(|e| anyhow::anyhow!("Failed to create agent data envelope: {}", e))?;
|
||||||
|
|
||||||
// Serialize envelope
|
// Serialize envelope
|
||||||
let serialized = serde_json::to_vec(&envelope)?;
|
let serialized = serde_json::to_vec(&envelope)?;
|
||||||
@ -61,63 +48,8 @@ impl ZmqHandler {
|
|||||||
// Send via ZMQ
|
// Send via ZMQ
|
||||||
self.publisher.send(&serialized, 0)?;
|
self.publisher.send(&serialized, 0)?;
|
||||||
|
|
||||||
debug!("Published metrics message ({} bytes)", serialized.len());
|
debug!("Published agent data message ({} bytes)", serialized.len());
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send heartbeat (placeholder for future use)
|
|
||||||
|
|
||||||
/// Try to receive a command (non-blocking)
|
|
||||||
pub fn try_receive_command(&self) -> Result<Option<AgentCommand>> {
|
|
||||||
match self.command_receiver.recv_bytes(zmq::DONTWAIT) {
|
|
||||||
Ok(bytes) => {
|
|
||||||
debug!("Received command message ({} bytes)", bytes.len());
|
|
||||||
|
|
||||||
let command: AgentCommand = serde_json::from_slice(&bytes)
|
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to deserialize command: {}", e))?;
|
|
||||||
|
|
||||||
debug!("Parsed command: {:?}", command);
|
|
||||||
Ok(Some(command))
|
|
||||||
}
|
|
||||||
Err(zmq::Error::EAGAIN) => {
|
|
||||||
// No message available (non-blocking)
|
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
Err(e) => Err(anyhow::anyhow!("ZMQ receive error: {}", e)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Commands that can be sent to the agent
|
|
||||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
|
||||||
pub enum AgentCommand {
|
|
||||||
/// Request immediate metric collection
|
|
||||||
CollectNow,
|
|
||||||
/// Change collection interval
|
|
||||||
SetInterval { seconds: u64 },
|
|
||||||
/// Enable/disable a collector
|
|
||||||
ToggleCollector { name: String, enabled: bool },
|
|
||||||
/// Request status/health check
|
|
||||||
Ping,
|
|
||||||
/// Control systemd service
|
|
||||||
ServiceControl {
|
|
||||||
service_name: String,
|
|
||||||
action: ServiceAction,
|
|
||||||
},
|
|
||||||
/// Rebuild NixOS system
|
|
||||||
SystemRebuild {
|
|
||||||
git_url: String,
|
|
||||||
git_branch: String,
|
|
||||||
working_dir: String,
|
|
||||||
api_key_file: Option<String>,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Service control actions
|
|
||||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
|
||||||
pub enum ServiceAction {
|
|
||||||
Start,
|
|
||||||
Stop,
|
|
||||||
Restart,
|
|
||||||
Status,
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,2 +0,0 @@
|
|||||||
// This file is now empty - all configuration values come from config files
|
|
||||||
// No hardcoded defaults are used
|
|
||||||
@ -6,8 +6,6 @@ use std::path::Path;
|
|||||||
pub mod loader;
|
pub mod loader;
|
||||||
pub mod validation;
|
pub mod validation;
|
||||||
|
|
||||||
use crate::status::HostStatusConfig;
|
|
||||||
|
|
||||||
/// Main agent configuration
|
/// Main agent configuration
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct AgentConfig {
|
pub struct AgentConfig {
|
||||||
@ -15,18 +13,17 @@ pub struct AgentConfig {
|
|||||||
pub collectors: CollectorConfig,
|
pub collectors: CollectorConfig,
|
||||||
pub cache: CacheConfig,
|
pub cache: CacheConfig,
|
||||||
pub notifications: NotificationConfig,
|
pub notifications: NotificationConfig,
|
||||||
pub status_aggregation: HostStatusConfig,
|
|
||||||
pub collection_interval_seconds: u64,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ZMQ communication configuration
|
/// ZMQ communication configuration
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct ZmqConfig {
|
pub struct ZmqConfig {
|
||||||
pub publisher_port: u16,
|
pub publisher_port: u16,
|
||||||
pub command_port: u16,
|
|
||||||
pub bind_address: String,
|
pub bind_address: String,
|
||||||
pub timeout_ms: u64,
|
pub transmission_interval_seconds: u64,
|
||||||
pub heartbeat_interval_ms: u64,
|
/// Heartbeat transmission interval in seconds for host connectivity detection
|
||||||
|
#[serde(default = "default_heartbeat_interval_seconds")]
|
||||||
|
pub heartbeat_interval_seconds: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Collector configuration
|
/// Collector configuration
|
||||||
@ -72,7 +69,8 @@ pub struct DiskConfig {
|
|||||||
pub usage_warning_percent: f32,
|
pub usage_warning_percent: f32,
|
||||||
/// Disk usage critical threshold (percentage)
|
/// Disk usage critical threshold (percentage)
|
||||||
pub usage_critical_percent: f32,
|
pub usage_critical_percent: f32,
|
||||||
/// Filesystem configurations
|
/// Filesystem configurations (optional - auto-discovery used if empty)
|
||||||
|
#[serde(default)]
|
||||||
pub filesystems: Vec<FilesystemConfig>,
|
pub filesystems: Vec<FilesystemConfig>,
|
||||||
/// SMART monitoring thresholds
|
/// SMART monitoring thresholds
|
||||||
pub temperature_warning_celsius: f32,
|
pub temperature_warning_celsius: f32,
|
||||||
@ -104,6 +102,10 @@ pub struct SystemdConfig {
|
|||||||
pub memory_critical_mb: f32,
|
pub memory_critical_mb: f32,
|
||||||
pub service_directories: std::collections::HashMap<String, Vec<String>>,
|
pub service_directories: std::collections::HashMap<String, Vec<String>>,
|
||||||
pub host_user_mapping: String,
|
pub host_user_mapping: String,
|
||||||
|
pub nginx_check_interval_seconds: u64,
|
||||||
|
pub http_timeout_seconds: u64,
|
||||||
|
pub http_connect_timeout_seconds: u64,
|
||||||
|
pub nginx_latency_critical_ms: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -139,6 +141,23 @@ pub struct NotificationConfig {
|
|||||||
pub from_email: String,
|
pub from_email: String,
|
||||||
pub to_email: String,
|
pub to_email: String,
|
||||||
pub rate_limit_minutes: u64,
|
pub rate_limit_minutes: u64,
|
||||||
|
/// Email notification batching interval in seconds (default: 60)
|
||||||
|
pub aggregation_interval_seconds: u64,
|
||||||
|
/// List of metric names to exclude from email notifications
|
||||||
|
#[serde(default)]
|
||||||
|
pub exclude_email_metrics: Vec<String>,
|
||||||
|
/// Path to maintenance mode file that suppresses email notifications when present
|
||||||
|
#[serde(default = "default_maintenance_mode_file")]
|
||||||
|
pub maintenance_mode_file: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn default_heartbeat_interval_seconds() -> u64 {
|
||||||
|
5
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_maintenance_mode_file() -> String {
|
||||||
|
"/tmp/cm-maintenance".to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AgentConfig {
|
impl AgentConfig {
|
||||||
|
|||||||
@ -7,25 +7,13 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
|
|||||||
bail!("ZMQ publisher port cannot be 0");
|
bail!("ZMQ publisher port cannot be 0");
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.zmq.command_port == 0 {
|
|
||||||
bail!("ZMQ command port cannot be 0");
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.zmq.publisher_port == config.zmq.command_port {
|
|
||||||
bail!("ZMQ publisher and command ports cannot be the same");
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.zmq.bind_address.is_empty() {
|
if config.zmq.bind_address.is_empty() {
|
||||||
bail!("ZMQ bind address cannot be empty");
|
bail!("ZMQ bind address cannot be empty");
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.zmq.timeout_ms == 0 {
|
// Validate ZMQ transmission interval
|
||||||
bail!("ZMQ timeout cannot be 0");
|
if config.zmq.transmission_interval_seconds == 0 {
|
||||||
}
|
bail!("ZMQ transmission interval cannot be 0");
|
||||||
|
|
||||||
// Validate collection interval
|
|
||||||
if config.collection_interval_seconds == 0 {
|
|
||||||
bail!("Collection interval cannot be 0");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate CPU thresholds
|
// Validate CPU thresholds
|
||||||
@ -83,6 +71,13 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validate systemd configuration
|
||||||
|
if config.collectors.systemd.enabled {
|
||||||
|
if config.collectors.systemd.nginx_latency_critical_ms <= 0.0 {
|
||||||
|
bail!("Nginx latency critical threshold must be positive");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Validate SMTP configuration
|
// Validate SMTP configuration
|
||||||
if config.notifications.enabled {
|
if config.notifications.enabled {
|
||||||
if config.notifications.smtp_host.is_empty() {
|
if config.notifications.smtp_host.is_empty() {
|
||||||
|
|||||||
@ -7,16 +7,30 @@ mod agent;
|
|||||||
mod collectors;
|
mod collectors;
|
||||||
mod communication;
|
mod communication;
|
||||||
mod config;
|
mod config;
|
||||||
mod metrics;
|
|
||||||
mod notifications;
|
mod notifications;
|
||||||
mod status;
|
|
||||||
|
|
||||||
use agent::Agent;
|
use agent::Agent;
|
||||||
|
|
||||||
|
/// Get version showing cm-dashboard-agent package hash for easy deployment verification
|
||||||
|
fn get_version() -> &'static str {
|
||||||
|
// Get the path of the current executable
|
||||||
|
let exe_path = std::env::current_exe().expect("Failed to get executable path");
|
||||||
|
let exe_str = exe_path.to_string_lossy();
|
||||||
|
|
||||||
|
// Extract Nix store hash from path like /nix/store/HASH-cm-dashboard-v0.1.8/bin/cm-dashboard-agent
|
||||||
|
let hash_part = exe_str.strip_prefix("/nix/store/").expect("Not a nix store path");
|
||||||
|
let hash = hash_part.split('-').next().expect("Invalid nix store path format");
|
||||||
|
assert!(hash.len() >= 8, "Hash too short");
|
||||||
|
|
||||||
|
// Return first 8 characters of nix store hash
|
||||||
|
let short_hash = hash[..8].to_string();
|
||||||
|
Box::leak(short_hash.into_boxed_str())
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
#[command(name = "cm-dashboard-agent")]
|
#[command(name = "cm-dashboard-agent")]
|
||||||
#[command(about = "CM Dashboard metrics agent with individual metric collection")]
|
#[command(about = "CM Dashboard metrics agent with individual metric collection")]
|
||||||
#[command(version)]
|
#[command(version = get_version())]
|
||||||
struct Cli {
|
struct Cli {
|
||||||
/// Increase logging verbosity (-v, -vv)
|
/// Increase logging verbosity (-v, -vv)
|
||||||
#[arg(short, long, action = clap::ArgAction::Count)]
|
#[arg(short, long, action = clap::ArgAction::Count)]
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use cm_dashboard_shared::{Metric, StatusTracker};
|
use cm_dashboard_shared::{Metric, StatusTracker};
|
||||||
use tracing::{error, info};
|
use std::time::{Duration, Instant};
|
||||||
|
use tracing::{debug, error, info};
|
||||||
|
|
||||||
use crate::collectors::{
|
use crate::collectors::{
|
||||||
backup::BackupCollector, cpu::CpuCollector, disk::DiskCollector, memory::MemoryCollector,
|
backup::BackupCollector, cpu::CpuCollector, disk::DiskCollector, memory::MemoryCollector,
|
||||||
@ -8,15 +9,24 @@ use crate::collectors::{
|
|||||||
};
|
};
|
||||||
use crate::config::{AgentConfig, CollectorConfig};
|
use crate::config::{AgentConfig, CollectorConfig};
|
||||||
|
|
||||||
/// Manages all metric collectors
|
/// Collector with timing information
|
||||||
|
struct TimedCollector {
|
||||||
|
collector: Box<dyn Collector>,
|
||||||
|
interval: Duration,
|
||||||
|
last_collection: Option<Instant>,
|
||||||
|
name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Manages all metric collectors with individual intervals
|
||||||
pub struct MetricCollectionManager {
|
pub struct MetricCollectionManager {
|
||||||
collectors: Vec<Box<dyn Collector>>,
|
collectors: Vec<TimedCollector>,
|
||||||
status_tracker: StatusTracker,
|
status_tracker: StatusTracker,
|
||||||
|
cached_metrics: Vec<Metric>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MetricCollectionManager {
|
impl MetricCollectionManager {
|
||||||
pub async fn new(config: &CollectorConfig, _agent_config: &AgentConfig) -> Result<Self> {
|
pub async fn new(config: &CollectorConfig, _agent_config: &AgentConfig) -> Result<Self> {
|
||||||
let mut collectors: Vec<Box<dyn Collector>> = Vec::new();
|
let mut collectors: Vec<TimedCollector> = Vec::new();
|
||||||
|
|
||||||
// Benchmark mode - only enable specific collector based on env var
|
// Benchmark mode - only enable specific collector based on env var
|
||||||
let benchmark_mode = std::env::var("BENCHMARK_COLLECTOR").ok();
|
let benchmark_mode = std::env::var("BENCHMARK_COLLECTOR").ok();
|
||||||
@ -26,7 +36,12 @@ impl MetricCollectionManager {
|
|||||||
// CPU collector only
|
// CPU collector only
|
||||||
if config.cpu.enabled {
|
if config.cpu.enabled {
|
||||||
let cpu_collector = CpuCollector::new(config.cpu.clone());
|
let cpu_collector = CpuCollector::new(config.cpu.clone());
|
||||||
collectors.push(Box::new(cpu_collector));
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(cpu_collector),
|
||||||
|
interval: Duration::from_secs(config.cpu.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "CPU".to_string(),
|
||||||
|
});
|
||||||
info!("BENCHMARK: CPU collector only");
|
info!("BENCHMARK: CPU collector only");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -34,20 +49,35 @@ impl MetricCollectionManager {
|
|||||||
// Memory collector only
|
// Memory collector only
|
||||||
if config.memory.enabled {
|
if config.memory.enabled {
|
||||||
let memory_collector = MemoryCollector::new(config.memory.clone());
|
let memory_collector = MemoryCollector::new(config.memory.clone());
|
||||||
collectors.push(Box::new(memory_collector));
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(memory_collector),
|
||||||
|
interval: Duration::from_secs(config.memory.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Memory".to_string(),
|
||||||
|
});
|
||||||
info!("BENCHMARK: Memory collector only");
|
info!("BENCHMARK: Memory collector only");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some("disk") => {
|
Some("disk") => {
|
||||||
// Disk collector only
|
// Disk collector only
|
||||||
let disk_collector = DiskCollector::new(config.disk.clone());
|
let disk_collector = DiskCollector::new(config.disk.clone());
|
||||||
collectors.push(Box::new(disk_collector));
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(disk_collector),
|
||||||
|
interval: Duration::from_secs(config.disk.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Disk".to_string(),
|
||||||
|
});
|
||||||
info!("BENCHMARK: Disk collector only");
|
info!("BENCHMARK: Disk collector only");
|
||||||
}
|
}
|
||||||
Some("systemd") => {
|
Some("systemd") => {
|
||||||
// Systemd collector only
|
// Systemd collector only
|
||||||
let systemd_collector = SystemdCollector::new(config.systemd.clone());
|
let systemd_collector = SystemdCollector::new(config.systemd.clone());
|
||||||
collectors.push(Box::new(systemd_collector));
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(systemd_collector),
|
||||||
|
interval: Duration::from_secs(config.systemd.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Systemd".to_string(),
|
||||||
|
});
|
||||||
info!("BENCHMARK: Systemd collector only");
|
info!("BENCHMARK: Systemd collector only");
|
||||||
}
|
}
|
||||||
Some("backup") => {
|
Some("backup") => {
|
||||||
@ -57,7 +87,12 @@ impl MetricCollectionManager {
|
|||||||
config.backup.backup_paths.first().cloned(),
|
config.backup.backup_paths.first().cloned(),
|
||||||
config.backup.max_age_hours,
|
config.backup.max_age_hours,
|
||||||
);
|
);
|
||||||
collectors.push(Box::new(backup_collector));
|
collectors.push(TimedCollector {
|
||||||
|
collector: Box::new(backup_collector),
|
||||||
|
interval: Duration::from_secs(config.backup.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Backup".to_string(),
|
||||||
|
});
|
||||||
info!("BENCHMARK: Backup collector only");
|
info!("BENCHMARK: Backup collector only");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -69,37 +104,67 @@ impl MetricCollectionManager {
|
|||||||
// Normal mode - all collectors
|
// Normal mode - all collectors
|
||||||
if config.cpu.enabled {
|
if config.cpu.enabled {
|
||||||
let cpu_collector = CpuCollector::new(config.cpu.clone());
|
let cpu_collector = CpuCollector::new(config.cpu.clone());
|
||||||
collectors.push(Box::new(cpu_collector));
|
collectors.push(TimedCollector {
|
||||||
info!("CPU collector initialized");
|
collector: Box::new(cpu_collector),
|
||||||
|
interval: Duration::from_secs(config.cpu.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "CPU".to_string(),
|
||||||
|
});
|
||||||
|
info!("CPU collector initialized with {}s interval", config.cpu.interval_seconds);
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.memory.enabled {
|
if config.memory.enabled {
|
||||||
let memory_collector = MemoryCollector::new(config.memory.clone());
|
let memory_collector = MemoryCollector::new(config.memory.clone());
|
||||||
collectors.push(Box::new(memory_collector));
|
collectors.push(TimedCollector {
|
||||||
info!("Memory collector initialized");
|
collector: Box::new(memory_collector),
|
||||||
|
interval: Duration::from_secs(config.memory.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Memory".to_string(),
|
||||||
|
});
|
||||||
|
info!("Memory collector initialized with {}s interval", config.memory.interval_seconds);
|
||||||
}
|
}
|
||||||
|
|
||||||
let disk_collector = DiskCollector::new(config.disk.clone());
|
let disk_collector = DiskCollector::new(config.disk.clone());
|
||||||
collectors.push(Box::new(disk_collector));
|
collectors.push(TimedCollector {
|
||||||
info!("Disk collector initialized");
|
collector: Box::new(disk_collector),
|
||||||
|
interval: Duration::from_secs(config.disk.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Disk".to_string(),
|
||||||
|
});
|
||||||
|
info!("Disk collector initialized with {}s interval", config.disk.interval_seconds);
|
||||||
|
|
||||||
let systemd_collector = SystemdCollector::new(config.systemd.clone());
|
let systemd_collector = SystemdCollector::new(config.systemd.clone());
|
||||||
collectors.push(Box::new(systemd_collector));
|
collectors.push(TimedCollector {
|
||||||
info!("Systemd collector initialized");
|
collector: Box::new(systemd_collector),
|
||||||
|
interval: Duration::from_secs(config.systemd.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Systemd".to_string(),
|
||||||
|
});
|
||||||
|
info!("Systemd collector initialized with {}s interval", config.systemd.interval_seconds);
|
||||||
|
|
||||||
if config.backup.enabled {
|
if config.backup.enabled {
|
||||||
let backup_collector = BackupCollector::new(
|
let backup_collector = BackupCollector::new(
|
||||||
config.backup.backup_paths.first().cloned(),
|
config.backup.backup_paths.first().cloned(),
|
||||||
config.backup.max_age_hours,
|
config.backup.max_age_hours,
|
||||||
);
|
);
|
||||||
collectors.push(Box::new(backup_collector));
|
collectors.push(TimedCollector {
|
||||||
info!("Backup collector initialized");
|
collector: Box::new(backup_collector),
|
||||||
|
interval: Duration::from_secs(config.backup.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "Backup".to_string(),
|
||||||
|
});
|
||||||
|
info!("Backup collector initialized with {}s interval", config.backup.interval_seconds);
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.nixos.enabled {
|
if config.nixos.enabled {
|
||||||
let nixos_collector = NixOSCollector::new(config.nixos.clone());
|
let nixos_collector = NixOSCollector::new(config.nixos.clone());
|
||||||
collectors.push(Box::new(nixos_collector));
|
collectors.push(TimedCollector {
|
||||||
info!("NixOS collector initialized");
|
collector: Box::new(nixos_collector),
|
||||||
|
interval: Duration::from_secs(config.nixos.interval_seconds),
|
||||||
|
last_collection: None,
|
||||||
|
name: "NixOS".to_string(),
|
||||||
|
});
|
||||||
|
info!("NixOS collector initialized with {}s interval", config.nixos.interval_seconds);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -113,29 +178,89 @@ impl MetricCollectionManager {
|
|||||||
Ok(Self {
|
Ok(Self {
|
||||||
collectors,
|
collectors,
|
||||||
status_tracker: StatusTracker::new(),
|
status_tracker: StatusTracker::new(),
|
||||||
|
cached_metrics: Vec::new(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Force collection from ALL collectors immediately (used at startup)
|
/// Force collection from ALL collectors immediately (used at startup)
|
||||||
pub async fn collect_all_metrics_force(&mut self) -> Result<Vec<Metric>> {
|
pub async fn collect_all_metrics_force(&mut self) -> Result<Vec<Metric>> {
|
||||||
self.collect_all_metrics().await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Collect metrics from all collectors
|
|
||||||
pub async fn collect_all_metrics(&mut self) -> Result<Vec<Metric>> {
|
|
||||||
let mut all_metrics = Vec::new();
|
let mut all_metrics = Vec::new();
|
||||||
|
let now = Instant::now();
|
||||||
|
|
||||||
for collector in &self.collectors {
|
for timed_collector in &mut self.collectors {
|
||||||
match collector.collect(&mut self.status_tracker).await {
|
match timed_collector.collector.collect(&mut self.status_tracker).await {
|
||||||
Ok(metrics) => {
|
Ok(metrics) => {
|
||||||
|
let metric_count = metrics.len();
|
||||||
all_metrics.extend(metrics);
|
all_metrics.extend(metrics);
|
||||||
|
timed_collector.last_collection = Some(now);
|
||||||
|
debug!("Force collected {} metrics from {}", metric_count, timed_collector.name);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Collector failed: {}", e);
|
error!("Collector {} failed: {}", timed_collector.name, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cache the collected metrics
|
||||||
|
self.cached_metrics = all_metrics.clone();
|
||||||
Ok(all_metrics)
|
Ok(all_metrics)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Collect metrics from collectors whose intervals have elapsed
|
||||||
|
pub async fn collect_metrics_timed(&mut self) -> Result<Vec<Metric>> {
|
||||||
|
let mut all_metrics = Vec::new();
|
||||||
|
let now = Instant::now();
|
||||||
|
|
||||||
|
for timed_collector in &mut self.collectors {
|
||||||
|
let should_collect = match timed_collector.last_collection {
|
||||||
|
None => true, // First collection
|
||||||
|
Some(last_time) => now.duration_since(last_time) >= timed_collector.interval,
|
||||||
|
};
|
||||||
|
|
||||||
|
if should_collect {
|
||||||
|
match timed_collector.collector.collect(&mut self.status_tracker).await {
|
||||||
|
Ok(metrics) => {
|
||||||
|
let metric_count = metrics.len();
|
||||||
|
all_metrics.extend(metrics);
|
||||||
|
timed_collector.last_collection = Some(now);
|
||||||
|
debug!(
|
||||||
|
"Collected {} metrics from {} ({}s interval)",
|
||||||
|
metric_count,
|
||||||
|
timed_collector.name,
|
||||||
|
timed_collector.interval.as_secs()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Collector {} failed: {}", timed_collector.name, e);
|
||||||
|
// Update last_collection time even on failure to prevent immediate retries
|
||||||
|
timed_collector.last_collection = Some(now);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update cache with newly collected metrics
|
||||||
|
if !all_metrics.is_empty() {
|
||||||
|
// Merge new metrics with cached metrics (replace by name)
|
||||||
|
for new_metric in &all_metrics {
|
||||||
|
// Remove any existing metric with the same name
|
||||||
|
self.cached_metrics.retain(|cached| cached.name != new_metric.name);
|
||||||
|
// Add the new metric
|
||||||
|
self.cached_metrics.push(new_metric.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(all_metrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect metrics from all collectors (legacy method for compatibility)
|
||||||
|
pub async fn collect_all_metrics(&mut self) -> Result<Vec<Metric>> {
|
||||||
|
self.collect_metrics_timed().await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get cached metrics without triggering fresh collection
|
||||||
|
pub fn get_cached_metrics(&self) -> Vec<Metric> {
|
||||||
|
self.cached_metrics.clone()
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -59,6 +59,6 @@ impl NotificationManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn is_maintenance_mode(&self) -> bool {
|
fn is_maintenance_mode(&self) -> bool {
|
||||||
std::fs::metadata("/tmp/cm-maintenance").is_ok()
|
std::fs::metadata(&self.config.maintenance_mode_file).is_ok()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -9,7 +9,6 @@ use chrono::Utc;
|
|||||||
pub struct HostStatusConfig {
|
pub struct HostStatusConfig {
|
||||||
pub enabled: bool,
|
pub enabled: bool,
|
||||||
pub aggregation_method: String, // "worst_case"
|
pub aggregation_method: String, // "worst_case"
|
||||||
pub notification_interval_seconds: u64,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for HostStatusConfig {
|
impl Default for HostStatusConfig {
|
||||||
@ -17,7 +16,6 @@ impl Default for HostStatusConfig {
|
|||||||
Self {
|
Self {
|
||||||
enabled: true,
|
enabled: true,
|
||||||
aggregation_method: "worst_case".to_string(),
|
aggregation_method: "worst_case".to_string(),
|
||||||
notification_interval_seconds: 30,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -160,25 +158,62 @@ impl HostStatusManager {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// Process a metric - updates status (notifications handled separately via batching)
|
/// Process a metric - updates status and queues for aggregated notifications if status changed
|
||||||
pub async fn process_metric(&mut self, metric: &Metric, _notification_manager: &mut crate::notifications::NotificationManager) {
|
pub async fn process_metric(&mut self, metric: &Metric, _notification_manager: &mut crate::notifications::NotificationManager) -> bool {
|
||||||
// Just update status - notifications are handled by process_pending_notifications
|
let old_service_status = self.service_statuses.get(&metric.name).copied();
|
||||||
self.update_service_status(metric.name.clone(), metric.status);
|
let old_host_status = self.current_host_status;
|
||||||
|
let new_service_status = metric.status;
|
||||||
|
|
||||||
|
// Update status (this recalculates host status internally)
|
||||||
|
self.update_service_status(metric.name.clone(), new_service_status);
|
||||||
|
|
||||||
|
let new_host_status = self.current_host_status;
|
||||||
|
let mut status_changed = false;
|
||||||
|
|
||||||
|
// Check if service status actually changed (ignore first-time status setting)
|
||||||
|
if let Some(old_service_status) = old_service_status {
|
||||||
|
if old_service_status != new_service_status {
|
||||||
|
debug!("Service status change detected for {}: {:?} -> {:?}", metric.name, old_service_status, new_service_status);
|
||||||
|
|
||||||
|
// Queue change for aggregated notification (not immediate)
|
||||||
|
self.queue_status_change(&metric.name, old_service_status, new_service_status);
|
||||||
|
|
||||||
|
status_changed = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
debug!("Initial status set for {}: {:?}", metric.name, new_service_status);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if host status changed (this should trigger immediate transmission)
|
||||||
|
if old_host_status != new_host_status {
|
||||||
|
debug!("Host status change detected: {:?} -> {:?}", old_host_status, new_host_status);
|
||||||
|
status_changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
status_changed // Return true if either service or host status changed
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Process pending notifications - call this at notification intervals
|
/// Queue status change for aggregated notification
|
||||||
|
fn queue_status_change(&mut self, metric_name: &str, old_status: Status, new_status: Status) {
|
||||||
|
// Add to pending changes for aggregated notification
|
||||||
|
let entry = self.pending_changes.entry(metric_name.to_string()).or_insert((old_status, old_status, 0));
|
||||||
|
entry.1 = new_status; // Update final status
|
||||||
|
entry.2 += 1; // Increment change count
|
||||||
|
|
||||||
|
// Set batch start time if this is the first change
|
||||||
|
if self.batch_start_time.is_none() {
|
||||||
|
self.batch_start_time = Some(Instant::now());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Process pending notifications - legacy method, now rarely used
|
||||||
pub async fn process_pending_notifications(&mut self, notification_manager: &mut crate::notifications::NotificationManager) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
pub async fn process_pending_notifications(&mut self, notification_manager: &mut crate::notifications::NotificationManager) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
if !self.config.enabled || self.pending_changes.is_empty() {
|
if !self.config.enabled || self.pending_changes.is_empty() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let batch_start = self.batch_start_time.unwrap_or_else(Instant::now);
|
// Process notifications immediately without interval batching
|
||||||
let batch_duration = batch_start.elapsed();
|
|
||||||
|
|
||||||
// Only process if enough time has passed
|
|
||||||
if batch_duration.as_secs() < self.config.notification_interval_seconds {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create aggregated status changes
|
// Create aggregated status changes
|
||||||
let aggregated = self.create_aggregated_changes();
|
let aggregated = self.create_aggregated_changes();
|
||||||
@ -237,11 +272,13 @@ impl HostStatusManager {
|
|||||||
/// Check if a status change is significant enough for notification
|
/// Check if a status change is significant enough for notification
|
||||||
fn is_significant_change(&self, old_status: Status, new_status: Status) -> bool {
|
fn is_significant_change(&self, old_status: Status, new_status: Status) -> bool {
|
||||||
match (old_status, new_status) {
|
match (old_status, new_status) {
|
||||||
// Always notify on problems
|
// Don't notify on transitions from Unknown (startup/restart scenario)
|
||||||
|
(Status::Unknown, _) => false,
|
||||||
|
// Always notify on problems (but not from Unknown)
|
||||||
(_, Status::Warning) | (_, Status::Critical) => true,
|
(_, Status::Warning) | (_, Status::Critical) => true,
|
||||||
// Only notify on recovery if it's from a problem state to OK and all services are OK
|
// Only notify on recovery if it's from a problem state to OK and all services are OK
|
||||||
(Status::Warning | Status::Critical, Status::Ok) => self.current_host_status == Status::Ok,
|
(Status::Warning | Status::Critical, Status::Ok) => self.current_host_status == Status::Ok,
|
||||||
// Don't notify on startup or other transitions
|
// Don't notify on other transitions
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -339,8 +376,8 @@ impl HostStatusManager {
|
|||||||
details.push('\n');
|
details.push('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Show recoveries
|
// Show recoveries only if host status is now OK (all services recovered)
|
||||||
if !recovery_changes.is_empty() {
|
if !recovery_changes.is_empty() && aggregated.host_status_final == Status::Ok {
|
||||||
details.push_str(&format!("✅ RECOVERIES ({}):\n", recovery_changes.len()));
|
details.push_str(&format!("✅ RECOVERIES ({}):\n", recovery_changes.len()));
|
||||||
for change in recovery_changes {
|
for change in recovery_changes {
|
||||||
details.push_str(&format!(" {}\n", change));
|
details.push_str(&format!(" {}\n", change));
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.0"
|
version = "0.1.275"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
@ -18,4 +18,5 @@ tracing-subscriber = { workspace = true }
|
|||||||
ratatui = { workspace = true }
|
ratatui = { workspace = true }
|
||||||
crossterm = { workspace = true }
|
crossterm = { workspace = true }
|
||||||
toml = { workspace = true }
|
toml = { workspace = true }
|
||||||
gethostname = { workspace = true }
|
gethostname = { workspace = true }
|
||||||
|
wake-on-lan = "0.2"
|
||||||
@ -1,28 +1,30 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use crossterm::{
|
use crossterm::{
|
||||||
event::{self},
|
event::{self, EnableMouseCapture, DisableMouseCapture, Event, MouseEvent, MouseEventKind, MouseButton},
|
||||||
execute,
|
execute,
|
||||||
terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen},
|
terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen},
|
||||||
};
|
};
|
||||||
use ratatui::{backend::CrosstermBackend, Terminal};
|
use ratatui::{backend::CrosstermBackend, Terminal, layout::Rect};
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use tracing::{debug, error, info, warn};
|
use tracing::{debug, error, info, warn};
|
||||||
|
|
||||||
use crate::communication::{AgentCommand, ServiceAction, ZmqCommandSender, ZmqConsumer};
|
use crate::communication::{ZmqConsumer};
|
||||||
use crate::config::DashboardConfig;
|
use crate::config::DashboardConfig;
|
||||||
use crate::metrics::MetricStore;
|
use crate::metrics::MetricStore;
|
||||||
use crate::ui::{TuiApp, UiCommand};
|
use crate::ui::TuiApp;
|
||||||
|
|
||||||
pub struct Dashboard {
|
pub struct Dashboard {
|
||||||
zmq_consumer: ZmqConsumer,
|
zmq_consumer: ZmqConsumer,
|
||||||
zmq_command_sender: ZmqCommandSender,
|
|
||||||
metric_store: MetricStore,
|
metric_store: MetricStore,
|
||||||
tui_app: Option<TuiApp>,
|
tui_app: Option<TuiApp>,
|
||||||
terminal: Option<Terminal<CrosstermBackend<io::Stdout>>>,
|
terminal: Option<Terminal<CrosstermBackend<io::Stdout>>>,
|
||||||
headless: bool,
|
headless: bool,
|
||||||
initial_commands_sent: std::collections::HashSet<String>,
|
initial_commands_sent: std::collections::HashSet<String>,
|
||||||
config: DashboardConfig,
|
config: DashboardConfig,
|
||||||
|
title_area: Rect, // Store title area for mouse event handling
|
||||||
|
system_area: Rect, // Store system area for mouse event handling
|
||||||
|
services_area: Rect, // Store services area for mouse event handling
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Dashboard {
|
impl Dashboard {
|
||||||
@ -58,20 +60,9 @@ impl Dashboard {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Initialize ZMQ command sender
|
|
||||||
let zmq_command_sender = match ZmqCommandSender::new(&config.zmq) {
|
|
||||||
Ok(sender) => sender,
|
|
||||||
Err(e) => {
|
|
||||||
error!("Failed to initialize ZMQ command sender: {}", e);
|
|
||||||
return Err(e);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Connect to predefined hosts from configuration
|
|
||||||
let hosts = config.hosts.predefined_hosts.clone();
|
|
||||||
|
|
||||||
// Try to connect to hosts but don't fail if none are available
|
// Try to connect to hosts but don't fail if none are available
|
||||||
match zmq_consumer.connect_to_predefined_hosts(&hosts).await {
|
match zmq_consumer.connect_to_predefined_hosts(&config.hosts).await {
|
||||||
Ok(_) => info!("Successfully connected to ZMQ hosts"),
|
Ok(_) => info!("Successfully connected to ZMQ hosts"),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!(
|
warn!(
|
||||||
@ -91,7 +82,7 @@ impl Dashboard {
|
|||||||
(None, None)
|
(None, None)
|
||||||
} else {
|
} else {
|
||||||
// Initialize TUI app
|
// Initialize TUI app
|
||||||
let tui_app = TuiApp::new();
|
let tui_app = TuiApp::new(config.clone());
|
||||||
|
|
||||||
// Setup terminal
|
// Setup terminal
|
||||||
if let Err(e) = enable_raw_mode() {
|
if let Err(e) = enable_raw_mode() {
|
||||||
@ -104,7 +95,7 @@ impl Dashboard {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut stdout = io::stdout();
|
let mut stdout = io::stdout();
|
||||||
if let Err(e) = execute!(stdout, EnterAlternateScreen) {
|
if let Err(e) = execute!(stdout, EnterAlternateScreen, EnableMouseCapture) {
|
||||||
error!("Failed to enter alternate screen: {}", e);
|
error!("Failed to enter alternate screen: {}", e);
|
||||||
let _ = disable_raw_mode();
|
let _ = disable_raw_mode();
|
||||||
return Err(e.into());
|
return Err(e.into());
|
||||||
@ -127,55 +118,65 @@ impl Dashboard {
|
|||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
zmq_consumer,
|
zmq_consumer,
|
||||||
zmq_command_sender,
|
|
||||||
metric_store,
|
metric_store,
|
||||||
tui_app,
|
tui_app,
|
||||||
terminal,
|
terminal,
|
||||||
headless,
|
headless,
|
||||||
initial_commands_sent: std::collections::HashSet::new(),
|
initial_commands_sent: std::collections::HashSet::new(),
|
||||||
config,
|
config,
|
||||||
|
title_area: Rect::default(),
|
||||||
|
system_area: Rect::default(),
|
||||||
|
services_area: Rect::default(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send a command to a specific agent
|
|
||||||
pub async fn send_command(&mut self, hostname: &str, command: AgentCommand) -> Result<()> {
|
|
||||||
self.zmq_command_sender
|
|
||||||
.send_command(hostname, command)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn run(&mut self) -> Result<()> {
|
pub async fn run(&mut self) -> Result<()> {
|
||||||
info!("Starting dashboard main loop");
|
info!("Starting dashboard main loop");
|
||||||
|
|
||||||
let mut last_metrics_check = Instant::now();
|
let mut last_metrics_check = Instant::now();
|
||||||
let metrics_check_interval = Duration::from_millis(100); // Check for metrics every 100ms
|
let metrics_check_interval = Duration::from_millis(100); // Check for metrics every 100ms
|
||||||
|
let mut last_heartbeat_check = Instant::now();
|
||||||
|
let heartbeat_check_interval = Duration::from_secs(1); // Check for host connectivity every 1 second
|
||||||
|
let mut needs_render = true; // Track if we need to render
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
// Handle terminal events (keyboard input) only if not headless
|
// Handle terminal events (keyboard and mouse input) only if not headless
|
||||||
if !self.headless {
|
if !self.headless {
|
||||||
match event::poll(Duration::from_millis(50)) {
|
match event::poll(Duration::from_millis(200)) {
|
||||||
Ok(true) => {
|
Ok(true) => {
|
||||||
match event::read() {
|
match event::read() {
|
||||||
Ok(event) => {
|
Ok(event) => {
|
||||||
if let Some(ref mut tui_app) = self.tui_app {
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
// Handle input and check for commands
|
match event {
|
||||||
match tui_app.handle_input(event) {
|
Event::Key(_) => {
|
||||||
Ok(Some(command)) => {
|
// Handle keyboard input
|
||||||
// Execute the command
|
match tui_app.handle_input(event) {
|
||||||
if let Err(e) = self.execute_ui_command(command).await {
|
Ok(_) => {
|
||||||
error!("Failed to execute UI command: {}", e);
|
needs_render = true;
|
||||||
|
// Check if we should quit
|
||||||
|
if tui_app.should_quit() {
|
||||||
|
info!("Quit requested, exiting dashboard");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Error handling input: {}", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(None) => {
|
Event::Mouse(mouse_event) => {
|
||||||
// No command, check if we should quit
|
// Handle mouse events
|
||||||
if tui_app.should_quit() {
|
if let Err(e) = self.handle_mouse_event(mouse_event) {
|
||||||
info!("Quit requested, exiting dashboard");
|
error!("Error handling mouse event: {}", e);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
needs_render = true;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Event::Resize(_width, _height) => {
|
||||||
error!("Error handling input: {}", e);
|
// Terminal was resized - mark for re-render
|
||||||
|
needs_render = true;
|
||||||
}
|
}
|
||||||
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -195,143 +196,480 @@ impl Dashboard {
|
|||||||
|
|
||||||
// Check for new metrics
|
// Check for new metrics
|
||||||
if last_metrics_check.elapsed() >= metrics_check_interval {
|
if last_metrics_check.elapsed() >= metrics_check_interval {
|
||||||
if let Ok(Some(metric_message)) = self.zmq_consumer.receive_metrics().await {
|
if let Ok(Some(agent_data)) = self.zmq_consumer.receive_agent_data().await {
|
||||||
debug!(
|
debug!(
|
||||||
"Received metrics from {}: {} metrics",
|
"Received agent data from {}",
|
||||||
metric_message.hostname,
|
agent_data.hostname
|
||||||
metric_message.metrics.len()
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// Check if this is the first time we've seen this host
|
// Track first contact with host (no command needed - agent sends data every 2s)
|
||||||
let is_new_host = !self
|
let is_new_host = !self
|
||||||
.initial_commands_sent
|
.initial_commands_sent
|
||||||
.contains(&metric_message.hostname);
|
.contains(&agent_data.hostname);
|
||||||
|
|
||||||
if is_new_host {
|
if is_new_host {
|
||||||
info!(
|
info!(
|
||||||
"First contact with host {}, sending initial CollectNow command",
|
"First contact with host {} - data will update automatically",
|
||||||
metric_message.hostname
|
agent_data.hostname
|
||||||
);
|
);
|
||||||
|
self.initial_commands_sent
|
||||||
|
.insert(agent_data.hostname.clone());
|
||||||
|
}
|
||||||
|
|
||||||
// Send CollectNow command for immediate refresh
|
// Store structured data directly
|
||||||
if let Err(e) = self
|
self.metric_store.store_agent_data(agent_data);
|
||||||
.send_command(&metric_message.hostname, AgentCommand::CollectNow)
|
|
||||||
.await
|
// Check for agent version mismatches across hosts
|
||||||
{
|
if let Some((current_version, outdated_hosts)) = self.metric_store.get_version_mismatches() {
|
||||||
error!(
|
for outdated_host in &outdated_hosts {
|
||||||
"Failed to send initial CollectNow command to {}: {}",
|
warn!("Host {} has outdated agent version (current: {})", outdated_host, current_version);
|
||||||
metric_message.hostname, e
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
info!(
|
|
||||||
"✓ Sent initial CollectNow command to {}",
|
|
||||||
metric_message.hostname
|
|
||||||
);
|
|
||||||
self.initial_commands_sent
|
|
||||||
.insert(metric_message.hostname.clone());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update metric store
|
// Update TUI with new metrics (only if not headless)
|
||||||
self.metric_store
|
|
||||||
.update_metrics(&metric_message.hostname, metric_message.metrics);
|
|
||||||
|
|
||||||
// Update TUI with new hosts and metrics (only if not headless)
|
|
||||||
if let Some(ref mut tui_app) = self.tui_app {
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
let mut connected_hosts = self
|
tui_app.update_metrics(&mut self.metric_store);
|
||||||
.metric_store
|
|
||||||
.get_connected_hosts(Duration::from_secs(30));
|
|
||||||
|
|
||||||
// Add hosts that are rebuilding but may be temporarily disconnected
|
|
||||||
// Use extended timeout (5 minutes) for rebuilding hosts
|
|
||||||
let rebuilding_hosts = self
|
|
||||||
.metric_store
|
|
||||||
.get_connected_hosts(Duration::from_secs(300));
|
|
||||||
|
|
||||||
for host in rebuilding_hosts {
|
|
||||||
if !connected_hosts.contains(&host) {
|
|
||||||
// Check if this host is rebuilding in the UI
|
|
||||||
if tui_app.is_host_rebuilding(&host) {
|
|
||||||
connected_hosts.push(host);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tui_app.update_hosts(connected_hosts);
|
|
||||||
tui_app.update_metrics(&self.metric_store);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
needs_render = true; // New metrics received, need to render
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Also check for command output messages
|
||||||
|
if let Ok(Some(cmd_output)) = self.zmq_consumer.receive_command_output().await {
|
||||||
|
debug!(
|
||||||
|
"Received command output from {}: {}",
|
||||||
|
cmd_output.hostname,
|
||||||
|
cmd_output.output_line
|
||||||
|
);
|
||||||
|
|
||||||
|
// Command output (terminal popup removed - output not displayed)
|
||||||
|
}
|
||||||
|
|
||||||
last_metrics_check = Instant::now();
|
last_metrics_check = Instant::now();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Render TUI (only if not headless)
|
// Check for host connectivity changes (heartbeat timeouts) periodically
|
||||||
if !self.headless {
|
if last_heartbeat_check.elapsed() >= heartbeat_check_interval {
|
||||||
if let (Some(ref mut terminal), Some(ref mut tui_app)) =
|
let timeout = Duration::from_secs(self.config.zmq.heartbeat_timeout_seconds);
|
||||||
(&mut self.terminal, &mut self.tui_app)
|
|
||||||
{
|
// Clean up metrics for offline hosts
|
||||||
if let Err(e) = terminal.draw(|frame| {
|
self.metric_store.cleanup_offline_hosts(timeout);
|
||||||
tui_app.render(frame, &self.metric_store);
|
|
||||||
}) {
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
error!("Error rendering TUI: {}", e);
|
let connected_hosts = self.metric_store.get_connected_hosts(timeout);
|
||||||
break;
|
tui_app.update_hosts(connected_hosts);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
last_heartbeat_check = Instant::now();
|
||||||
|
needs_render = true; // Heartbeat check happened, may have changed hosts
|
||||||
}
|
}
|
||||||
|
|
||||||
// Small sleep to prevent excessive CPU usage
|
// Render TUI only when needed (not headless and something changed)
|
||||||
tokio::time::sleep(Duration::from_millis(10)).await;
|
if !self.headless && needs_render {
|
||||||
|
if let Some(ref mut terminal) = self.terminal {
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
// Clear and autoresize terminal to handle any resize events
|
||||||
|
if let Err(e) = terminal.autoresize() {
|
||||||
|
warn!("Error autoresizing terminal: {}", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check minimum terminal size to prevent panics
|
||||||
|
let size = terminal.size().unwrap_or_default();
|
||||||
|
if size.width < 90 || size.height < 15 {
|
||||||
|
// Terminal too small, show error message
|
||||||
|
let msg_text = format!("Terminal too small\n\nMinimum: 90x15\nCurrent: {}x{}", size.width, size.height);
|
||||||
|
let _ = terminal.draw(|frame| {
|
||||||
|
use ratatui::widgets::{Paragraph, Block, Borders};
|
||||||
|
use ratatui::layout::Alignment;
|
||||||
|
let msg = Paragraph::new(msg_text.clone())
|
||||||
|
.alignment(Alignment::Center)
|
||||||
|
.block(Block::default().borders(Borders::ALL));
|
||||||
|
frame.render_widget(msg, frame.size());
|
||||||
|
});
|
||||||
|
} else if let Err(e) = terminal.draw(|frame| {
|
||||||
|
let (title_area, system_area, services_area) = tui_app.render(frame, &self.metric_store);
|
||||||
|
self.title_area = title_area;
|
||||||
|
self.system_area = system_area;
|
||||||
|
self.services_area = services_area;
|
||||||
|
}) {
|
||||||
|
error!("Error rendering TUI: {}", e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
needs_render = false; // Reset flag after rendering
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("Dashboard main loop ended");
|
info!("Dashboard main loop ended");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Execute a UI command by sending it to the appropriate agent
|
/// Handle mouse events
|
||||||
async fn execute_ui_command(&self, command: UiCommand) -> Result<()> {
|
fn handle_mouse_event(&mut self, mouse: MouseEvent) -> Result<()> {
|
||||||
match command {
|
let x = mouse.column;
|
||||||
UiCommand::ServiceRestart { hostname, service_name } => {
|
let y = mouse.row;
|
||||||
info!("Sending restart command for service {} on {}", service_name, hostname);
|
|
||||||
let agent_command = AgentCommand::ServiceControl {
|
// Handle popup menu if open
|
||||||
service_name,
|
let popup_info = if let Some(ref tui_app) = self.tui_app {
|
||||||
action: ServiceAction::Restart,
|
tui_app.popup_menu.clone().map(|popup| {
|
||||||
};
|
let hostname = tui_app.current_host.clone();
|
||||||
self.zmq_command_sender.send_command(&hostname, agent_command).await?;
|
(popup, hostname)
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some((popup, hostname)) = popup_info {
|
||||||
|
// Calculate popup bounds using screen coordinates
|
||||||
|
let popup_width = 20;
|
||||||
|
let popup_height = 5; // 3 items + 2 borders
|
||||||
|
|
||||||
|
// Get terminal size
|
||||||
|
let (screen_width, screen_height) = if let Some(ref terminal) = self.terminal {
|
||||||
|
let size = terminal.size().unwrap_or_default();
|
||||||
|
(size.width, size.height)
|
||||||
|
} else {
|
||||||
|
(80, 24) // fallback
|
||||||
|
};
|
||||||
|
|
||||||
|
let popup_x = if popup.x + popup_width < screen_width {
|
||||||
|
popup.x
|
||||||
|
} else {
|
||||||
|
screen_width.saturating_sub(popup_width)
|
||||||
|
};
|
||||||
|
|
||||||
|
let popup_y = if popup.y + popup_height < screen_height {
|
||||||
|
popup.y
|
||||||
|
} else {
|
||||||
|
screen_height.saturating_sub(popup_height)
|
||||||
|
};
|
||||||
|
|
||||||
|
let popup_area = Rect {
|
||||||
|
x: popup_x,
|
||||||
|
y: popup_y,
|
||||||
|
width: popup_width,
|
||||||
|
height: popup_height,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Update selected index on mouse move
|
||||||
|
if matches!(mouse.kind, MouseEventKind::Moved) {
|
||||||
|
if is_in_area(x, y, &popup_area) {
|
||||||
|
let relative_y = y.saturating_sub(popup_y + 1) as usize; // +1 for top border
|
||||||
|
if relative_y < 3 {
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
if let Some(ref mut popup) = tui_app.popup_menu {
|
||||||
|
popup.selected_index = relative_y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
}
|
}
|
||||||
UiCommand::ServiceStart { hostname, service_name } => {
|
|
||||||
info!("Sending start command for service {} on {}", service_name, hostname);
|
if matches!(mouse.kind, MouseEventKind::Down(MouseButton::Left)) {
|
||||||
let agent_command = AgentCommand::ServiceControl {
|
if is_in_area(x, y, &popup_area) {
|
||||||
service_name: service_name.clone(),
|
// Click inside popup - execute action
|
||||||
action: ServiceAction::Start,
|
let relative_y = y.saturating_sub(popup_y + 1) as usize; // +1 for top border
|
||||||
};
|
if relative_y < 3 {
|
||||||
self.zmq_command_sender.send_command(&hostname, agent_command).await?;
|
// Execute the selected action
|
||||||
|
self.execute_service_action(relative_y, &popup.service_name, hostname.as_deref())?;
|
||||||
|
}
|
||||||
|
// Close popup after action
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
tui_app.popup_menu = None;
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
} else {
|
||||||
|
// Click outside popup - close it
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
tui_app.popup_menu = None;
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
UiCommand::ServiceStop { hostname, service_name } => {
|
|
||||||
info!("Sending stop command for service {} on {}", service_name, hostname);
|
// Any other event while popup is open - don't process panels
|
||||||
let agent_command = AgentCommand::ServiceControl {
|
return Ok(());
|
||||||
service_name: service_name.clone(),
|
}
|
||||||
action: ServiceAction::Stop,
|
|
||||||
};
|
// Check for title bar clicks (host selection)
|
||||||
self.zmq_command_sender.send_command(&hostname, agent_command).await?;
|
if matches!(mouse.kind, MouseEventKind::Down(MouseButton::Left)) {
|
||||||
}
|
if is_in_area(x, y, &self.title_area) {
|
||||||
UiCommand::SystemRebuild { hostname } => {
|
// Click in title bar - check if it's on a hostname
|
||||||
info!("Sending system rebuild command to {}", hostname);
|
// The title bar has "cm-dashboard vX.X.X" on the left (22 chars)
|
||||||
let agent_command = AgentCommand::SystemRebuild {
|
// Then hostnames start at position 22
|
||||||
git_url: self.config.system.nixos_config_git_url.clone(),
|
if x >= 22 {
|
||||||
git_branch: self.config.system.nixos_config_branch.clone(),
|
let hostname = self.find_hostname_at_position(x);
|
||||||
working_dir: self.config.system.nixos_config_working_dir.clone(),
|
if let Some(host) = hostname {
|
||||||
api_key_file: self.config.system.nixos_config_api_key_file.clone(),
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
};
|
tui_app.switch_to_host(&host);
|
||||||
self.zmq_command_sender.send_command(&hostname, agent_command).await?;
|
}
|
||||||
}
|
}
|
||||||
UiCommand::TriggerBackup { hostname } => {
|
}
|
||||||
info!("Trigger backup requested for {}", hostname);
|
return Ok(());
|
||||||
// TODO: Implement backup trigger command
|
|
||||||
info!("Backup trigger not yet implemented");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Determine which panel the mouse is over
|
||||||
|
let in_system_area = is_in_area(x, y, &self.system_area);
|
||||||
|
let in_services_area = is_in_area(x, y, &self.services_area);
|
||||||
|
|
||||||
|
if !in_system_area && !in_services_area {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle mouse events
|
||||||
|
match mouse.kind {
|
||||||
|
MouseEventKind::ScrollDown => {
|
||||||
|
if in_system_area {
|
||||||
|
// Scroll down in system panel
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
if let Some(hostname) = tui_app.current_host.clone() {
|
||||||
|
let host_widgets = tui_app.get_or_create_host_widgets(&hostname);
|
||||||
|
let visible_height = self.system_area.height as usize;
|
||||||
|
let total_lines = host_widgets.system_widget.get_total_lines();
|
||||||
|
host_widgets.system_widget.scroll_down(visible_height, total_lines);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if in_services_area {
|
||||||
|
// Scroll down in services panel
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
if let Some(hostname) = tui_app.current_host.clone() {
|
||||||
|
let host_widgets = tui_app.get_or_create_host_widgets(&hostname);
|
||||||
|
// Calculate visible height (panel height - borders and header)
|
||||||
|
let visible_height = self.services_area.height.saturating_sub(3) as usize;
|
||||||
|
host_widgets.services_widget.scroll_down(visible_height);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MouseEventKind::ScrollUp => {
|
||||||
|
if in_system_area {
|
||||||
|
// Scroll up in system panel
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
if let Some(hostname) = tui_app.current_host.clone() {
|
||||||
|
let host_widgets = tui_app.get_or_create_host_widgets(&hostname);
|
||||||
|
host_widgets.system_widget.scroll_up();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if in_services_area {
|
||||||
|
// Scroll up in services panel
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
if let Some(hostname) = tui_app.current_host.clone() {
|
||||||
|
let host_widgets = tui_app.get_or_create_host_widgets(&hostname);
|
||||||
|
host_widgets.services_widget.scroll_up();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MouseEventKind::Down(button) => {
|
||||||
|
// Only handle clicks in services area (not system area)
|
||||||
|
if !in_services_area {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate which service was clicked
|
||||||
|
// The services area includes a border, so we need to account for that
|
||||||
|
let relative_y = y.saturating_sub(self.services_area.y + 2) as usize; // +2 for border and header
|
||||||
|
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
if let Some(hostname) = tui_app.current_host.clone() {
|
||||||
|
let host_widgets = tui_app.get_or_create_host_widgets(&hostname);
|
||||||
|
|
||||||
|
// Account for scroll offset - the clicked line is relative to viewport
|
||||||
|
let display_line_index = host_widgets.services_widget.scroll_offset + relative_y;
|
||||||
|
|
||||||
|
// Map display line to parent service index
|
||||||
|
if let Some(parent_index) = host_widgets.services_widget.display_line_to_parent_index(display_line_index) {
|
||||||
|
// Set the selected index to the clicked parent service
|
||||||
|
host_widgets.services_widget.selected_index = parent_index;
|
||||||
|
|
||||||
|
match button {
|
||||||
|
MouseButton::Left => {
|
||||||
|
// Left click just selects the service
|
||||||
|
debug!("Left-clicked service at display line {} (parent index: {})", display_line_index, parent_index);
|
||||||
|
}
|
||||||
|
MouseButton::Right => {
|
||||||
|
// Right click opens context menu
|
||||||
|
debug!("Right-clicked service at display line {} (parent index: {})", display_line_index, parent_index);
|
||||||
|
|
||||||
|
// Get the service name for the popup
|
||||||
|
if let Some(service_name) = host_widgets.services_widget.get_selected_service() {
|
||||||
|
tui_app.popup_menu = Some(crate::ui::PopupMenu {
|
||||||
|
service_name,
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
selected_index: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Execute service action from popup menu
|
||||||
|
fn execute_service_action(&self, action_index: usize, service_name: &str, hostname: Option<&str>) -> Result<()> {
|
||||||
|
let Some(hostname) = hostname else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
let connection_ip = self.get_connection_ip(hostname);
|
||||||
|
|
||||||
|
match action_index {
|
||||||
|
0 => {
|
||||||
|
// Start Service
|
||||||
|
let service_start_command = format!(
|
||||||
|
"echo 'Starting service: {} on {}' && ssh -tt {}@{} \"bash -ic '{} start {}'\"",
|
||||||
|
service_name,
|
||||||
|
hostname,
|
||||||
|
self.config.ssh.rebuild_user,
|
||||||
|
connection_ip,
|
||||||
|
self.config.ssh.service_manage_cmd,
|
||||||
|
service_name
|
||||||
|
);
|
||||||
|
|
||||||
|
std::process::Command::new("tmux")
|
||||||
|
.arg("split-window")
|
||||||
|
.arg("-v")
|
||||||
|
.arg("-p")
|
||||||
|
.arg("30")
|
||||||
|
.arg(&service_start_command)
|
||||||
|
.spawn()
|
||||||
|
.ok();
|
||||||
|
}
|
||||||
|
1 => {
|
||||||
|
// Stop Service
|
||||||
|
let service_stop_command = format!(
|
||||||
|
"echo 'Stopping service: {} on {}' && ssh -tt {}@{} \"bash -ic '{} stop {}'\"",
|
||||||
|
service_name,
|
||||||
|
hostname,
|
||||||
|
self.config.ssh.rebuild_user,
|
||||||
|
connection_ip,
|
||||||
|
self.config.ssh.service_manage_cmd,
|
||||||
|
service_name
|
||||||
|
);
|
||||||
|
|
||||||
|
std::process::Command::new("tmux")
|
||||||
|
.arg("split-window")
|
||||||
|
.arg("-v")
|
||||||
|
.arg("-p")
|
||||||
|
.arg("30")
|
||||||
|
.arg(&service_stop_command)
|
||||||
|
.spawn()
|
||||||
|
.ok();
|
||||||
|
}
|
||||||
|
2 => {
|
||||||
|
// View Logs
|
||||||
|
let logs_command = format!(
|
||||||
|
"ssh -tt {}@{} '{} logs {}'",
|
||||||
|
self.config.ssh.rebuild_user,
|
||||||
|
connection_ip,
|
||||||
|
self.config.ssh.service_manage_cmd,
|
||||||
|
service_name
|
||||||
|
);
|
||||||
|
|
||||||
|
std::process::Command::new("tmux")
|
||||||
|
.arg("split-window")
|
||||||
|
.arg("-v")
|
||||||
|
.arg("-p")
|
||||||
|
.arg("30")
|
||||||
|
.arg(&logs_command)
|
||||||
|
.spawn()
|
||||||
|
.ok();
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get connection IP for a host
|
||||||
|
fn get_connection_ip(&self, hostname: &str) -> String {
|
||||||
|
self.config
|
||||||
|
.hosts
|
||||||
|
.get(hostname)
|
||||||
|
.and_then(|h| h.ip.clone())
|
||||||
|
.unwrap_or_else(|| hostname.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find which hostname is at a given x position in the title bar
|
||||||
|
fn find_hostname_at_position(&self, x: u16) -> Option<String> {
|
||||||
|
if let Some(ref tui_app) = self.tui_app {
|
||||||
|
// The hosts are RIGHT-ALIGNED in chunks[1]!
|
||||||
|
// Need to calculate total width first, then right-align
|
||||||
|
|
||||||
|
// Get terminal width
|
||||||
|
let terminal_width = if let Some(ref terminal) = self.terminal {
|
||||||
|
terminal.size().unwrap_or_default().width
|
||||||
|
} else {
|
||||||
|
80
|
||||||
|
};
|
||||||
|
|
||||||
|
// Calculate total width of all host text
|
||||||
|
let mut total_width = 0_u16;
|
||||||
|
for (i, host) in tui_app.get_available_hosts().iter().enumerate() {
|
||||||
|
if i > 0 {
|
||||||
|
total_width += 1; // space between hosts
|
||||||
|
}
|
||||||
|
total_width += 2; // icon + space
|
||||||
|
let is_selected = Some(host) == tui_app.current_host.as_ref();
|
||||||
|
if is_selected {
|
||||||
|
total_width += 1 + host.len() as u16 + 1; // [hostname]
|
||||||
|
} else {
|
||||||
|
total_width += host.len() as u16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
total_width += 1; // right padding
|
||||||
|
|
||||||
|
// chunks[1] starts at 22, has width of (terminal_width - 22)
|
||||||
|
let chunk_width = terminal_width - 22;
|
||||||
|
|
||||||
|
// Right-aligned position
|
||||||
|
let hosts_start_x = if total_width < chunk_width {
|
||||||
|
22 + (chunk_width - total_width)
|
||||||
|
} else {
|
||||||
|
22
|
||||||
|
};
|
||||||
|
|
||||||
|
// Now calculate positions starting from hosts_start_x
|
||||||
|
let mut pos = hosts_start_x;
|
||||||
|
|
||||||
|
for (i, host) in tui_app.get_available_hosts().iter().enumerate() {
|
||||||
|
if i > 0 {
|
||||||
|
pos += 1; // " "
|
||||||
|
}
|
||||||
|
|
||||||
|
let host_start = pos;
|
||||||
|
pos += 2; // "● "
|
||||||
|
|
||||||
|
let is_selected = Some(host) == tui_app.current_host.as_ref();
|
||||||
|
if is_selected {
|
||||||
|
pos += 1 + host.len() as u16 + 1; // [hostname]
|
||||||
|
} else {
|
||||||
|
pos += host.len() as u16;
|
||||||
|
}
|
||||||
|
|
||||||
|
if x >= host_start && x < pos {
|
||||||
|
return Some(host.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a point is within a rectangular area
|
||||||
|
fn is_in_area(x: u16, y: u16, area: &Rect) -> bool {
|
||||||
|
x >= area.x && x < area.x + area.width
|
||||||
|
&& y >= area.y && y < area.y + area.height
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Drop for Dashboard {
|
impl Drop for Dashboard {
|
||||||
@ -340,7 +678,7 @@ impl Drop for Dashboard {
|
|||||||
if !self.headless {
|
if !self.headless {
|
||||||
let _ = disable_raw_mode();
|
let _ = disable_raw_mode();
|
||||||
if let Some(ref mut terminal) = self.terminal {
|
if let Some(ref mut terminal) = self.terminal {
|
||||||
let _ = execute!(terminal.backend_mut(), LeaveAlternateScreen);
|
let _ = execute!(terminal.backend_mut(), LeaveAlternateScreen, DisableMouseCapture);
|
||||||
let _ = terminal.show_cursor();
|
let _ = terminal.show_cursor();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,43 +1,10 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use cm_dashboard_shared::{MessageEnvelope, MessageType, MetricMessage};
|
use cm_dashboard_shared::{AgentData, CommandOutputMessage, MessageEnvelope, MessageType};
|
||||||
use tracing::{debug, error, info, warn};
|
use tracing::{debug, error, info, warn};
|
||||||
use zmq::{Context, Socket, SocketType};
|
use zmq::{Context, Socket, SocketType};
|
||||||
|
|
||||||
use crate::config::ZmqConfig;
|
use crate::config::ZmqConfig;
|
||||||
|
|
||||||
/// Commands that can be sent to agents
|
|
||||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
|
||||||
pub enum AgentCommand {
|
|
||||||
/// Request immediate metric collection
|
|
||||||
CollectNow,
|
|
||||||
/// Change collection interval
|
|
||||||
SetInterval { seconds: u64 },
|
|
||||||
/// Enable/disable a collector
|
|
||||||
ToggleCollector { name: String, enabled: bool },
|
|
||||||
/// Request status/health check
|
|
||||||
Ping,
|
|
||||||
/// Control systemd service
|
|
||||||
ServiceControl {
|
|
||||||
service_name: String,
|
|
||||||
action: ServiceAction,
|
|
||||||
},
|
|
||||||
/// Rebuild NixOS system
|
|
||||||
SystemRebuild {
|
|
||||||
git_url: String,
|
|
||||||
git_branch: String,
|
|
||||||
working_dir: String,
|
|
||||||
api_key_file: Option<String>,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Service control actions
|
|
||||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
|
||||||
pub enum ServiceAction {
|
|
||||||
Start,
|
|
||||||
Stop,
|
|
||||||
Restart,
|
|
||||||
Status,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// ZMQ consumer for receiving metrics from agents
|
/// ZMQ consumer for receiving metrics from agents
|
||||||
pub struct ZmqConsumer {
|
pub struct ZmqConsumer {
|
||||||
@ -83,13 +50,14 @@ impl ZmqConsumer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Connect to predefined hosts
|
|
||||||
pub async fn connect_to_predefined_hosts(&mut self, hosts: &[String]) -> Result<()> {
|
/// Connect to predefined hosts using their configuration
|
||||||
|
pub async fn connect_to_predefined_hosts(&mut self, hosts: &std::collections::HashMap<String, crate::config::HostDetails>) -> Result<()> {
|
||||||
let default_port = self.config.subscriber_ports[0];
|
let default_port = self.config.subscriber_ports[0];
|
||||||
|
|
||||||
for hostname in hosts {
|
for (hostname, host_details) in hosts {
|
||||||
// Try to connect, but don't fail if some hosts are unreachable
|
// Try to connect using configured IP, but don't fail if some hosts are unreachable
|
||||||
if let Err(e) = self.connect_to_host(hostname, default_port).await {
|
if let Err(e) = self.connect_to_host_with_details(hostname, host_details, default_port).await {
|
||||||
warn!("Could not connect to {}: {}", hostname, e);
|
warn!("Could not connect to {}: {}", hostname, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -103,8 +71,54 @@ impl ZmqConsumer {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Receive metrics from any connected agent (non-blocking)
|
/// Connect to a host using its configuration details
|
||||||
pub async fn receive_metrics(&mut self) -> Result<Option<MetricMessage>> {
|
pub async fn connect_to_host_with_details(&mut self, hostname: &str, host_details: &crate::config::HostDetails, port: u16) -> Result<()> {
|
||||||
|
// Get primary connection IP only - no fallbacks
|
||||||
|
let primary_ip = host_details.get_connection_ip(hostname);
|
||||||
|
|
||||||
|
// Connect directly without fallback attempts
|
||||||
|
self.connect_to_host(&primary_ip, port).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Receive command output from any connected agent (non-blocking)
|
||||||
|
pub async fn receive_command_output(&mut self) -> Result<Option<CommandOutputMessage>> {
|
||||||
|
match self.subscriber.recv_bytes(zmq::DONTWAIT) {
|
||||||
|
Ok(data) => {
|
||||||
|
// Deserialize envelope
|
||||||
|
let envelope: MessageEnvelope = serde_json::from_slice(&data)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to deserialize envelope: {}", e))?;
|
||||||
|
|
||||||
|
// Check message type
|
||||||
|
match envelope.message_type {
|
||||||
|
MessageType::CommandOutput => {
|
||||||
|
let cmd_output = envelope
|
||||||
|
.decode_command_output()
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to decode command output: {}", e))?;
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"Received command output from {}: {}",
|
||||||
|
cmd_output.hostname,
|
||||||
|
cmd_output.output_line
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(Some(cmd_output))
|
||||||
|
}
|
||||||
|
_ => Ok(None), // Not a command output message
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(zmq::Error::EAGAIN) => {
|
||||||
|
// No message available (non-blocking mode)
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("ZMQ receive error: {}", e);
|
||||||
|
Err(anyhow::anyhow!("ZMQ receive error: {}", e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Receive agent data (non-blocking)
|
||||||
|
pub async fn receive_agent_data(&mut self) -> Result<Option<AgentData>> {
|
||||||
match self.subscriber.recv_bytes(zmq::DONTWAIT) {
|
match self.subscriber.recv_bytes(zmq::DONTWAIT) {
|
||||||
Ok(data) => {
|
Ok(data) => {
|
||||||
debug!("Received {} bytes from ZMQ", data.len());
|
debug!("Received {} bytes from ZMQ", data.len());
|
||||||
@ -115,25 +129,27 @@ impl ZmqConsumer {
|
|||||||
|
|
||||||
// Check message type
|
// Check message type
|
||||||
match envelope.message_type {
|
match envelope.message_type {
|
||||||
MessageType::Metrics => {
|
MessageType::AgentData => {
|
||||||
let metrics = envelope
|
let agent_data = envelope
|
||||||
.decode_metrics()
|
.decode_agent_data()
|
||||||
.map_err(|e| anyhow::anyhow!("Failed to decode metrics: {}", e))?;
|
.map_err(|e| anyhow::anyhow!("Failed to decode agent data: {}", e))?;
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"Received {} metrics from {}",
|
"Received agent data from host {}",
|
||||||
metrics.metrics.len(),
|
agent_data.hostname
|
||||||
metrics.hostname
|
|
||||||
);
|
);
|
||||||
|
Ok(Some(agent_data))
|
||||||
Ok(Some(metrics))
|
|
||||||
}
|
}
|
||||||
MessageType::Heartbeat => {
|
MessageType::Heartbeat => {
|
||||||
debug!("Received heartbeat");
|
debug!("Received heartbeat");
|
||||||
Ok(None) // Don't return heartbeats as metrics
|
Ok(None) // Don't return heartbeats
|
||||||
|
}
|
||||||
|
MessageType::CommandOutput => {
|
||||||
|
debug!("Received command output (will be handled by receive_command_output)");
|
||||||
|
Ok(None) // Command output handled by separate method
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
debug!("Received non-metrics message: {:?}", envelope.message_type);
|
debug!("Received unsupported message: {:?}", envelope.message_type);
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -148,44 +164,6 @@ impl ZmqConsumer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ZMQ command sender for sending commands to agents
|
|
||||||
pub struct ZmqCommandSender {
|
|
||||||
context: Context,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ZmqCommandSender {
|
|
||||||
pub fn new(_config: &ZmqConfig) -> Result<Self> {
|
|
||||||
let context = Context::new();
|
|
||||||
|
|
||||||
info!("ZMQ command sender initialized");
|
|
||||||
|
|
||||||
Ok(Self { context })
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Send a command to a specific agent
|
|
||||||
pub async fn send_command(&self, hostname: &str, command: AgentCommand) -> Result<()> {
|
|
||||||
// Create a new PUSH socket for this command (ZMQ best practice)
|
|
||||||
let socket = self.context.socket(SocketType::PUSH)?;
|
|
||||||
|
|
||||||
// Set socket options
|
|
||||||
socket.set_linger(1000)?; // Wait up to 1 second on close
|
|
||||||
socket.set_sndtimeo(5000)?; // 5 second send timeout
|
|
||||||
|
|
||||||
// Connect to agent's command port (6131)
|
|
||||||
let address = format!("tcp://{}:6131", hostname);
|
|
||||||
socket.connect(&address)?;
|
|
||||||
|
|
||||||
// Serialize command
|
|
||||||
let serialized = serde_json::to_vec(&command)?;
|
|
||||||
|
|
||||||
// Send command
|
|
||||||
socket.send(&serialized, 0)?;
|
|
||||||
|
|
||||||
info!("Sent command {:?} to agent at {}", command, hostname);
|
|
||||||
|
|
||||||
// Socket will be automatically closed when dropped
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@ -6,20 +6,40 @@ use std::path::Path;
|
|||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct DashboardConfig {
|
pub struct DashboardConfig {
|
||||||
pub zmq: ZmqConfig,
|
pub zmq: ZmqConfig,
|
||||||
pub hosts: HostsConfig,
|
pub hosts: std::collections::HashMap<String, HostDetails>,
|
||||||
pub system: SystemConfig,
|
pub system: SystemConfig,
|
||||||
|
pub ssh: SshConfig,
|
||||||
|
pub service_logs: std::collections::HashMap<String, Vec<ServiceLogConfig>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ZMQ consumer configuration
|
/// ZMQ consumer configuration
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct ZmqConfig {
|
pub struct ZmqConfig {
|
||||||
pub subscriber_ports: Vec<u16>,
|
pub subscriber_ports: Vec<u16>,
|
||||||
|
/// Heartbeat timeout in seconds - hosts considered offline if no heartbeat received within this time
|
||||||
|
#[serde(default = "default_heartbeat_timeout_seconds")]
|
||||||
|
pub heartbeat_timeout_seconds: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Hosts configuration
|
fn default_heartbeat_timeout_seconds() -> u64 {
|
||||||
|
10 // Default to 10 seconds - allows for multiple missed heartbeats
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Individual host configuration details
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct HostsConfig {
|
pub struct HostDetails {
|
||||||
pub predefined_hosts: Vec<String>,
|
pub mac_address: Option<String>,
|
||||||
|
/// Primary IP address (local network)
|
||||||
|
pub ip: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
impl HostDetails {
|
||||||
|
/// Get the IP address for connection (uses ip field or hostname as fallback)
|
||||||
|
pub fn get_connection_ip(&self, hostname: &str) -> String {
|
||||||
|
self.ip.as_ref().unwrap_or(&hostname.to_string()).clone()
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// System configuration
|
/// System configuration
|
||||||
@ -31,6 +51,21 @@ pub struct SystemConfig {
|
|||||||
pub nixos_config_api_key_file: Option<String>,
|
pub nixos_config_api_key_file: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// SSH configuration for rebuild and backup operations
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SshConfig {
|
||||||
|
pub rebuild_user: String,
|
||||||
|
pub rebuild_cmd: String,
|
||||||
|
pub service_manage_cmd: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Service log file configuration per host
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ServiceLogConfig {
|
||||||
|
pub service_name: String,
|
||||||
|
pub log_file_path: String,
|
||||||
|
}
|
||||||
|
|
||||||
impl DashboardConfig {
|
impl DashboardConfig {
|
||||||
pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
@ -52,8 +87,3 @@ impl Default for ZmqConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for HostsConfig {
|
|
||||||
fn default() -> Self {
|
|
||||||
panic!("Dashboard configuration must be loaded from file - no hardcoded defaults allowed")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
use std::process;
|
||||||
use tracing::{error, info};
|
use tracing::{error, info};
|
||||||
use tracing_subscriber::EnvFilter;
|
use tracing_subscriber::EnvFilter;
|
||||||
|
|
||||||
@ -11,26 +12,33 @@ mod ui;
|
|||||||
|
|
||||||
use app::Dashboard;
|
use app::Dashboard;
|
||||||
|
|
||||||
/// Get version showing cm-dashboard package hash for easy rebuild verification
|
|
||||||
fn get_version() -> &'static str {
|
/// Check if running inside tmux session
|
||||||
// Get the path of the current executable
|
fn check_tmux_session() {
|
||||||
let exe_path = std::env::current_exe().expect("Failed to get executable path");
|
// Check for TMUX environment variable which is set when inside a tmux session
|
||||||
let exe_str = exe_path.to_string_lossy();
|
if std::env::var("TMUX").is_err() {
|
||||||
|
eprintln!("╭─────────────────────────────────────────────────────────────╮");
|
||||||
// Extract Nix store hash from path like /nix/store/HASH-cm-dashboard-0.1.0/bin/cm-dashboard
|
eprintln!("│ ⚠️ TMUX REQUIRED │");
|
||||||
let hash_part = exe_str.strip_prefix("/nix/store/").expect("Not a nix store path");
|
eprintln!("├─────────────────────────────────────────────────────────────┤");
|
||||||
let hash = hash_part.split('-').next().expect("Invalid nix store path format");
|
eprintln!("│ CM Dashboard must be run inside a tmux session for proper │");
|
||||||
assert!(hash.len() >= 8, "Hash too short");
|
eprintln!("│ terminal handling and remote operation functionality. │");
|
||||||
|
eprintln!("│ │");
|
||||||
// Return first 8 characters of nix store hash
|
eprintln!("│ Please start a tmux session first: │");
|
||||||
let short_hash = hash[..8].to_string();
|
eprintln!("│ tmux new-session -d -s dashboard cm-dashboard │");
|
||||||
Box::leak(short_hash.into_boxed_str())
|
eprintln!("│ tmux attach-session -t dashboard │");
|
||||||
|
eprintln!("│ │");
|
||||||
|
eprintln!("│ Or simply: │");
|
||||||
|
eprintln!("│ tmux │");
|
||||||
|
eprintln!("│ cm-dashboard │");
|
||||||
|
eprintln!("╰─────────────────────────────────────────────────────────────╯");
|
||||||
|
process::exit(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
#[command(name = "cm-dashboard")]
|
#[command(name = "cm-dashboard")]
|
||||||
#[command(about = "CM Dashboard TUI with individual metric consumption")]
|
#[command(about = "CM Dashboard TUI with individual metric consumption")]
|
||||||
#[command(version = get_version())]
|
#[command(version)]
|
||||||
struct Cli {
|
struct Cli {
|
||||||
/// Increase logging verbosity (-v, -vv)
|
/// Increase logging verbosity (-v, -vv)
|
||||||
#[arg(short, long, action = clap::ArgAction::Count)]
|
#[arg(short, long, action = clap::ArgAction::Count)]
|
||||||
@ -68,6 +76,11 @@ async fn main() -> Result<()> {
|
|||||||
.init();
|
.init();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for tmux session requirement (only for TUI mode)
|
||||||
|
if !cli.headless {
|
||||||
|
check_tmux_session();
|
||||||
|
}
|
||||||
|
|
||||||
if cli.headless || cli.verbose > 0 {
|
if cli.headless || cli.verbose > 0 {
|
||||||
info!("CM Dashboard starting with individual metrics architecture...");
|
info!("CM Dashboard starting with individual metrics architecture...");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,18 +1,28 @@
|
|||||||
use cm_dashboard_shared::Metric;
|
use cm_dashboard_shared::AgentData;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use tracing::{debug, info, warn};
|
use tracing::{debug, info, warn};
|
||||||
|
|
||||||
use super::MetricDataPoint;
|
use super::MetricDataPoint;
|
||||||
|
|
||||||
|
/// ZMQ communication statistics per host
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct ZmqStats {
|
||||||
|
pub packets_received: u64,
|
||||||
|
pub last_packet_time: Instant,
|
||||||
|
pub last_packet_age_secs: f64,
|
||||||
|
}
|
||||||
|
|
||||||
/// Central metric storage for the dashboard
|
/// Central metric storage for the dashboard
|
||||||
pub struct MetricStore {
|
pub struct MetricStore {
|
||||||
/// Current metrics: hostname -> metric_name -> metric
|
/// Current structured data: hostname -> AgentData
|
||||||
current_metrics: HashMap<String, HashMap<String, Metric>>,
|
current_agent_data: HashMap<String, AgentData>,
|
||||||
/// Historical metrics for trending
|
/// Historical metrics for trending
|
||||||
historical_metrics: HashMap<String, Vec<MetricDataPoint>>,
|
historical_metrics: HashMap<String, Vec<MetricDataPoint>>,
|
||||||
/// Last update timestamp per host
|
/// Last heartbeat timestamp per host
|
||||||
last_update: HashMap<String, Instant>,
|
last_heartbeat: HashMap<String, Instant>,
|
||||||
|
/// ZMQ communication statistics per host
|
||||||
|
zmq_stats: HashMap<String, ZmqStats>,
|
||||||
/// Configuration
|
/// Configuration
|
||||||
max_metrics_per_host: usize,
|
max_metrics_per_host: usize,
|
||||||
history_retention: Duration,
|
history_retention: Duration,
|
||||||
@ -21,89 +31,101 @@ pub struct MetricStore {
|
|||||||
impl MetricStore {
|
impl MetricStore {
|
||||||
pub fn new(max_metrics_per_host: usize, history_retention_hours: u64) -> Self {
|
pub fn new(max_metrics_per_host: usize, history_retention_hours: u64) -> Self {
|
||||||
Self {
|
Self {
|
||||||
current_metrics: HashMap::new(),
|
current_agent_data: HashMap::new(),
|
||||||
historical_metrics: HashMap::new(),
|
historical_metrics: HashMap::new(),
|
||||||
last_update: HashMap::new(),
|
last_heartbeat: HashMap::new(),
|
||||||
|
zmq_stats: HashMap::new(),
|
||||||
max_metrics_per_host,
|
max_metrics_per_host,
|
||||||
history_retention: Duration::from_secs(history_retention_hours * 3600),
|
history_retention: Duration::from_secs(history_retention_hours * 3600),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Update metrics for a specific host
|
|
||||||
pub fn update_metrics(&mut self, hostname: &str, metrics: Vec<Metric>) {
|
/// Store structured agent data directly
|
||||||
|
pub fn store_agent_data(&mut self, agent_data: AgentData) {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
|
let hostname = agent_data.hostname.clone();
|
||||||
|
|
||||||
debug!("Updating {} metrics for host {}", metrics.len(), hostname);
|
debug!("Storing structured data for host {}", hostname);
|
||||||
|
|
||||||
// Get or create host entry
|
// Store the structured data directly
|
||||||
let host_metrics = self
|
self.current_agent_data.insert(hostname.clone(), agent_data);
|
||||||
.current_metrics
|
|
||||||
.entry(hostname.to_string())
|
|
||||||
.or_insert_with(HashMap::new);
|
|
||||||
|
|
||||||
// Get or create historical entry
|
// Update heartbeat timestamp
|
||||||
|
self.last_heartbeat.insert(hostname.clone(), now);
|
||||||
|
debug!("Updated heartbeat for host {}", hostname);
|
||||||
|
|
||||||
|
// Update ZMQ stats
|
||||||
|
let stats = self.zmq_stats.entry(hostname.clone()).or_insert(ZmqStats {
|
||||||
|
packets_received: 0,
|
||||||
|
last_packet_time: now,
|
||||||
|
last_packet_age_secs: 0.0,
|
||||||
|
});
|
||||||
|
stats.packets_received += 1;
|
||||||
|
stats.last_packet_time = now;
|
||||||
|
stats.last_packet_age_secs = 0.0; // Just received
|
||||||
|
|
||||||
|
// Add to history
|
||||||
let host_history = self
|
let host_history = self
|
||||||
.historical_metrics
|
.historical_metrics
|
||||||
.entry(hostname.to_string())
|
.entry(hostname.clone())
|
||||||
.or_insert_with(Vec::new);
|
.or_insert_with(Vec::new);
|
||||||
|
host_history.push(MetricDataPoint { received_at: now });
|
||||||
|
|
||||||
// Update current metrics and add to history
|
// Cleanup old data
|
||||||
for metric in metrics {
|
self.cleanup_host_data(&hostname);
|
||||||
let metric_name = metric.name.clone();
|
|
||||||
|
|
||||||
// Store current metric
|
info!("Stored structured data for {}", hostname);
|
||||||
host_metrics.insert(metric_name.clone(), metric.clone());
|
|
||||||
|
|
||||||
// Add to history
|
|
||||||
host_history.push(MetricDataPoint { received_at: now });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update last update timestamp
|
|
||||||
self.last_update.insert(hostname.to_string(), now);
|
|
||||||
|
|
||||||
// Get metrics count before cleanup
|
|
||||||
let metrics_count = host_metrics.len();
|
|
||||||
|
|
||||||
// Cleanup old history and enforce limits
|
|
||||||
self.cleanup_host_data(hostname);
|
|
||||||
|
|
||||||
info!(
|
|
||||||
"Updated metrics for {}: {} current metrics",
|
|
||||||
hostname, metrics_count
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get current metric for a specific host
|
|
||||||
pub fn get_metric(&self, hostname: &str, metric_name: &str) -> Option<&Metric> {
|
|
||||||
self.current_metrics.get(hostname)?.get(metric_name)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Get all current metrics for a host as a vector
|
|
||||||
pub fn get_metrics_for_host(&self, hostname: &str) -> Vec<&Metric> {
|
|
||||||
if let Some(metrics_map) = self.current_metrics.get(hostname) {
|
/// Get current structured data for a host
|
||||||
metrics_map.values().collect()
|
pub fn get_agent_data(&self, hostname: &str) -> Option<&AgentData> {
|
||||||
} else {
|
self.current_agent_data.get(hostname)
|
||||||
Vec::new()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get connected hosts (hosts with recent updates)
|
/// Get connected hosts (hosts with recent heartbeats)
|
||||||
pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> {
|
pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
|
|
||||||
self.last_update
|
self.last_heartbeat
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|(hostname, &last_update)| {
|
.filter_map(|(hostname, &last_heartbeat)| {
|
||||||
if now.duration_since(last_update) <= timeout {
|
if now.duration_since(last_heartbeat) <= timeout {
|
||||||
Some(hostname.clone())
|
Some(hostname.clone())
|
||||||
} else {
|
} else {
|
||||||
|
debug!("Host {} considered offline - last heartbeat was {:?} ago",
|
||||||
|
hostname, now.duration_since(last_heartbeat));
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Clean up data for offline hosts
|
||||||
|
pub fn cleanup_offline_hosts(&mut self, timeout: Duration) {
|
||||||
|
let now = Instant::now();
|
||||||
|
let mut hosts_to_cleanup = Vec::new();
|
||||||
|
|
||||||
|
// Find hosts that are offline (no recent heartbeat)
|
||||||
|
for (hostname, &last_heartbeat) in &self.last_heartbeat {
|
||||||
|
if now.duration_since(last_heartbeat) > timeout {
|
||||||
|
hosts_to_cleanup.push(hostname.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear data for offline hosts
|
||||||
|
for hostname in hosts_to_cleanup {
|
||||||
|
if let Some(_agent_data) = self.current_agent_data.remove(&hostname) {
|
||||||
|
info!("Cleared structured data for offline host: {}", hostname);
|
||||||
|
}
|
||||||
|
// Keep heartbeat timestamp for reconnection detection
|
||||||
|
// Don't remove from last_heartbeat to track when host was last seen
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Cleanup old data and enforce limits
|
/// Cleanup old data and enforce limits
|
||||||
fn cleanup_host_data(&mut self, hostname: &str) {
|
fn cleanup_host_data(&mut self, hostname: &str) {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
@ -124,4 +146,48 @@ impl MetricStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get agent versions from all hosts for cross-host comparison
|
||||||
|
pub fn get_agent_versions(&self) -> HashMap<String, String> {
|
||||||
|
let mut versions = HashMap::new();
|
||||||
|
|
||||||
|
for (hostname, agent_data) in &self.current_agent_data {
|
||||||
|
versions.insert(hostname.clone(), agent_data.agent_version.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
versions
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check for agent version mismatches across hosts
|
||||||
|
pub fn get_version_mismatches(&self) -> Option<(String, Vec<String>)> {
|
||||||
|
let versions = self.get_agent_versions();
|
||||||
|
|
||||||
|
if versions.len() < 2 {
|
||||||
|
return None; // Need at least 2 hosts to compare
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the most common version (assume it's the "current" version)
|
||||||
|
let mut version_counts = HashMap::new();
|
||||||
|
for version in versions.values() {
|
||||||
|
*version_counts.entry(version.clone()).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let most_common_version = version_counts
|
||||||
|
.iter()
|
||||||
|
.max_by_key(|(_, count)| *count)
|
||||||
|
.map(|(version, _)| version.clone())?;
|
||||||
|
|
||||||
|
// Find hosts with different versions
|
||||||
|
let outdated_hosts: Vec<String> = versions
|
||||||
|
.iter()
|
||||||
|
.filter(|(_, version)| *version != &most_common_version)
|
||||||
|
.map(|(hostname, _)| hostname.clone())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if outdated_hosts.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some((most_common_version, outdated_hosts))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use crossterm::event::{Event, KeyCode, KeyModifiers};
|
use crossterm::event::{Event, KeyCode};
|
||||||
use ratatui::{
|
use ratatui::{
|
||||||
layout::{Constraint, Direction, Layout, Rect},
|
layout::{Constraint, Direction, Layout, Rect},
|
||||||
style::Style,
|
style::Style,
|
||||||
@ -7,56 +7,23 @@ use ratatui::{
|
|||||||
Frame,
|
Frame,
|
||||||
};
|
};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::Instant;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
use wake_on_lan::MagicPacket;
|
||||||
|
|
||||||
pub mod theme;
|
pub mod theme;
|
||||||
pub mod widgets;
|
pub mod widgets;
|
||||||
|
|
||||||
|
use crate::config::DashboardConfig;
|
||||||
use crate::metrics::MetricStore;
|
use crate::metrics::MetricStore;
|
||||||
use cm_dashboard_shared::{Metric, Status};
|
use cm_dashboard_shared::Status;
|
||||||
use theme::{Components, Layout as ThemeLayout, Theme, Typography};
|
use theme::{Components, Layout as ThemeLayout, Theme};
|
||||||
use widgets::{BackupWidget, ServicesWidget, SystemWidget, Widget};
|
use widgets::{ServicesWidget, SystemWidget, Widget};
|
||||||
|
|
||||||
/// Commands that can be triggered from the UI
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum UiCommand {
|
|
||||||
ServiceRestart { hostname: String, service_name: String },
|
|
||||||
ServiceStart { hostname: String, service_name: String },
|
|
||||||
ServiceStop { hostname: String, service_name: String },
|
|
||||||
SystemRebuild { hostname: String },
|
|
||||||
TriggerBackup { hostname: String },
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Command execution status for visual feedback
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum CommandStatus {
|
|
||||||
/// Command is executing
|
|
||||||
InProgress { command_type: CommandType, target: String, start_time: std::time::Instant },
|
|
||||||
/// Command completed successfully
|
|
||||||
Success { command_type: CommandType, completed_at: std::time::Instant },
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Types of commands for status tracking
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum CommandType {
|
|
||||||
ServiceRestart,
|
|
||||||
ServiceStart,
|
|
||||||
ServiceStop,
|
|
||||||
SystemRebuild,
|
|
||||||
BackupTrigger,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Panel types for focus management
|
/// Panel types for focus management
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
||||||
pub enum PanelType {
|
|
||||||
System,
|
|
||||||
Services,
|
|
||||||
Backup,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PanelType {
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Widget states for a specific host
|
/// Widget states for a specific host
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -65,16 +32,8 @@ pub struct HostWidgets {
|
|||||||
pub system_widget: SystemWidget,
|
pub system_widget: SystemWidget,
|
||||||
/// Services widget state
|
/// Services widget state
|
||||||
pub services_widget: ServicesWidget,
|
pub services_widget: ServicesWidget,
|
||||||
/// Backup widget state
|
|
||||||
pub backup_widget: BackupWidget,
|
|
||||||
/// Scroll offsets for each panel
|
|
||||||
pub system_scroll_offset: usize,
|
|
||||||
pub services_scroll_offset: usize,
|
|
||||||
pub backup_scroll_offset: usize,
|
|
||||||
/// Last update time for this host
|
/// Last update time for this host
|
||||||
pub last_update: Option<Instant>,
|
pub last_update: Option<Instant>,
|
||||||
/// Active command status for visual feedback
|
|
||||||
pub command_status: Option<CommandStatus>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HostWidgets {
|
impl HostWidgets {
|
||||||
@ -82,122 +41,86 @@ impl HostWidgets {
|
|||||||
Self {
|
Self {
|
||||||
system_widget: SystemWidget::new(),
|
system_widget: SystemWidget::new(),
|
||||||
services_widget: ServicesWidget::new(),
|
services_widget: ServicesWidget::new(),
|
||||||
backup_widget: BackupWidget::new(),
|
|
||||||
system_scroll_offset: 0,
|
|
||||||
services_scroll_offset: 0,
|
|
||||||
backup_scroll_offset: 0,
|
|
||||||
last_update: None,
|
last_update: None,
|
||||||
command_status: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Popup menu state
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct PopupMenu {
|
||||||
|
pub service_name: String,
|
||||||
|
pub x: u16,
|
||||||
|
pub y: u16,
|
||||||
|
pub selected_index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
/// Main TUI application
|
/// Main TUI application
|
||||||
pub struct TuiApp {
|
pub struct TuiApp {
|
||||||
/// Widget states per host (hostname -> HostWidgets)
|
/// Widget states per host (hostname -> HostWidgets)
|
||||||
host_widgets: HashMap<String, HostWidgets>,
|
host_widgets: HashMap<String, HostWidgets>,
|
||||||
/// Current active host
|
/// Current active host
|
||||||
current_host: Option<String>,
|
pub current_host: Option<String>,
|
||||||
/// Available hosts
|
/// Available hosts
|
||||||
available_hosts: Vec<String>,
|
available_hosts: Vec<String>,
|
||||||
/// Host index for navigation
|
/// Host index for navigation
|
||||||
host_index: usize,
|
host_index: usize,
|
||||||
/// Currently focused panel
|
|
||||||
focused_panel: PanelType,
|
|
||||||
/// Should quit application
|
/// Should quit application
|
||||||
should_quit: bool,
|
should_quit: bool,
|
||||||
/// Track if user manually navigated away from localhost
|
/// Track if user manually navigated away from localhost
|
||||||
user_navigated_away: bool,
|
user_navigated_away: bool,
|
||||||
|
/// Dashboard configuration
|
||||||
|
config: DashboardConfig,
|
||||||
|
/// Cached localhost hostname to avoid repeated system calls
|
||||||
|
localhost: String,
|
||||||
|
/// Active popup menu (if any)
|
||||||
|
pub popup_menu: Option<PopupMenu>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TuiApp {
|
impl TuiApp {
|
||||||
pub fn new() -> Self {
|
pub fn new(config: DashboardConfig) -> Self {
|
||||||
Self {
|
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
||||||
|
let mut app = Self {
|
||||||
host_widgets: HashMap::new(),
|
host_widgets: HashMap::new(),
|
||||||
current_host: None,
|
current_host: None,
|
||||||
available_hosts: Vec::new(),
|
available_hosts: config.hosts.keys().cloned().collect(),
|
||||||
host_index: 0,
|
host_index: 0,
|
||||||
focused_panel: PanelType::System, // Start with System panel focused
|
|
||||||
should_quit: false,
|
should_quit: false,
|
||||||
user_navigated_away: false,
|
user_navigated_away: false,
|
||||||
|
config,
|
||||||
|
localhost,
|
||||||
|
popup_menu: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sort predefined hosts
|
||||||
|
app.available_hosts.sort();
|
||||||
|
|
||||||
|
// Initialize with first host if available
|
||||||
|
if !app.available_hosts.is_empty() {
|
||||||
|
app.current_host = Some(app.available_hosts[0].clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
app
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get or create host widgets for the given hostname
|
/// Get or create host widgets for the given hostname
|
||||||
fn get_or_create_host_widgets(&mut self, hostname: &str) -> &mut HostWidgets {
|
pub fn get_or_create_host_widgets(&mut self, hostname: &str) -> &mut HostWidgets {
|
||||||
self.host_widgets
|
self.host_widgets
|
||||||
.entry(hostname.to_string())
|
.entry(hostname.to_string())
|
||||||
.or_insert_with(HostWidgets::new)
|
.or_insert_with(HostWidgets::new)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Update widgets with metrics from store (only for current host)
|
/// Update widgets with structured data from store (only for current host)
|
||||||
pub fn update_metrics(&mut self, metric_store: &MetricStore) {
|
pub fn update_metrics(&mut self, metric_store: &mut MetricStore) {
|
||||||
// Check for command timeouts first
|
|
||||||
self.check_command_timeouts();
|
|
||||||
|
|
||||||
// Check for rebuild completion by agent hash change
|
|
||||||
self.check_rebuild_completion(metric_store);
|
|
||||||
|
|
||||||
if let Some(hostname) = self.current_host.clone() {
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
// Only update widgets if we have metrics for this host
|
// Get structured data for this host
|
||||||
let all_metrics = metric_store.get_metrics_for_host(&hostname);
|
if let Some(agent_data) = metric_store.get_agent_data(&hostname) {
|
||||||
if !all_metrics.is_empty() {
|
|
||||||
// Get metrics first while hostname is borrowed
|
|
||||||
let cpu_metrics: Vec<&Metric> = all_metrics
|
|
||||||
.iter()
|
|
||||||
.filter(|m| {
|
|
||||||
m.name.starts_with("cpu_")
|
|
||||||
|| m.name.contains("c_state_")
|
|
||||||
|| m.name.starts_with("process_top_")
|
|
||||||
})
|
|
||||||
.copied()
|
|
||||||
.collect();
|
|
||||||
let memory_metrics: Vec<&Metric> = all_metrics
|
|
||||||
.iter()
|
|
||||||
.filter(|m| m.name.starts_with("memory_") || m.name.starts_with("disk_tmp_"))
|
|
||||||
.copied()
|
|
||||||
.collect();
|
|
||||||
let service_metrics: Vec<&Metric> = all_metrics
|
|
||||||
.iter()
|
|
||||||
.filter(|m| m.name.starts_with("service_"))
|
|
||||||
.copied()
|
|
||||||
.collect();
|
|
||||||
let all_backup_metrics: Vec<&Metric> = all_metrics
|
|
||||||
.iter()
|
|
||||||
.filter(|m| m.name.starts_with("backup_"))
|
|
||||||
.copied()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// Now get host widgets and update them
|
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||||
|
|
||||||
// Collect all system metrics (CPU, memory, NixOS, disk/storage)
|
// Update all widgets with structured data directly
|
||||||
let mut system_metrics = cpu_metrics;
|
host_widgets.system_widget.update_from_agent_data(agent_data);
|
||||||
system_metrics.extend(memory_metrics);
|
host_widgets.services_widget.update_from_agent_data(agent_data);
|
||||||
|
|
||||||
// Add NixOS metrics - using exact matching for build display fix
|
|
||||||
let nixos_metrics: Vec<&Metric> = all_metrics
|
|
||||||
.iter()
|
|
||||||
.filter(|m| m.name == "system_nixos_build" || m.name == "system_active_users" || m.name == "system_agent_hash")
|
|
||||||
.copied()
|
|
||||||
.collect();
|
|
||||||
system_metrics.extend(nixos_metrics);
|
|
||||||
|
|
||||||
// Add disk/storage metrics
|
|
||||||
let disk_metrics: Vec<&Metric> = all_metrics
|
|
||||||
.iter()
|
|
||||||
.filter(|m| m.name.starts_with("disk_"))
|
|
||||||
.copied()
|
|
||||||
.collect();
|
|
||||||
system_metrics.extend(disk_metrics);
|
|
||||||
|
|
||||||
host_widgets.system_widget.update_from_metrics(&system_metrics);
|
|
||||||
host_widgets
|
|
||||||
.services_widget
|
|
||||||
.update_from_metrics(&service_metrics);
|
|
||||||
host_widgets
|
|
||||||
.backup_widget
|
|
||||||
.update_from_metrics(&all_backup_metrics);
|
|
||||||
|
|
||||||
host_widgets.last_update = Some(Instant::now());
|
host_widgets.last_update = Some(Instant::now());
|
||||||
}
|
}
|
||||||
@ -205,30 +128,28 @@ impl TuiApp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Update available hosts with localhost prioritization
|
/// Update available hosts with localhost prioritization
|
||||||
pub fn update_hosts(&mut self, hosts: Vec<String>) {
|
pub fn update_hosts(&mut self, discovered_hosts: Vec<String>) {
|
||||||
// Sort hosts alphabetically
|
// Start with configured hosts (always visible)
|
||||||
let mut sorted_hosts = hosts.clone();
|
let mut all_hosts: Vec<String> = self.config.hosts.keys().cloned().collect();
|
||||||
|
|
||||||
// Keep hosts that are undergoing SystemRebuild even if they're offline
|
// Add any discovered hosts that aren't already configured
|
||||||
for (hostname, host_widgets) in &self.host_widgets {
|
for host in discovered_hosts {
|
||||||
if let Some(CommandStatus::InProgress { command_type: CommandType::SystemRebuild, .. }) = &host_widgets.command_status {
|
if !all_hosts.contains(&host) {
|
||||||
if !sorted_hosts.contains(hostname) {
|
all_hosts.push(host);
|
||||||
sorted_hosts.push(hostname.clone());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sorted_hosts.sort();
|
|
||||||
self.available_hosts = sorted_hosts;
|
all_hosts.sort();
|
||||||
|
self.available_hosts = all_hosts;
|
||||||
|
|
||||||
// Get the current hostname (localhost) for auto-selection
|
// Get the current hostname (localhost) for auto-selection
|
||||||
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
|
||||||
if !self.available_hosts.is_empty() {
|
if !self.available_hosts.is_empty() {
|
||||||
if self.available_hosts.contains(&localhost) && !self.user_navigated_away {
|
if self.available_hosts.contains(&self.localhost) && !self.user_navigated_away {
|
||||||
// Localhost is available and user hasn't navigated away - switch to it
|
// Localhost is available and user hasn't navigated away - switch to it
|
||||||
self.current_host = Some(localhost.clone());
|
self.current_host = Some(self.localhost.clone());
|
||||||
// Find the actual index of localhost in the sorted list
|
// Find the actual index of localhost in the sorted list
|
||||||
self.host_index = self.available_hosts.iter().position(|h| h == &localhost).unwrap_or(0);
|
self.host_index = self.available_hosts.iter().position(|h| h == &self.localhost).unwrap_or(0);
|
||||||
} else if self.current_host.is_none() {
|
} else if self.current_host.is_none() {
|
||||||
// No current host - select first available (which is localhost if available)
|
// No current host - select first available (which is localhost if available)
|
||||||
self.current_host = Some(self.available_hosts[0].clone());
|
self.current_host = Some(self.available_hosts[0].clone());
|
||||||
@ -248,8 +169,16 @@ impl TuiApp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Handle keyboard input
|
/// Handle keyboard input
|
||||||
pub fn handle_input(&mut self, event: Event) -> Result<Option<UiCommand>> {
|
pub fn handle_input(&mut self, event: Event) -> Result<()> {
|
||||||
if let Event::Key(key) = event {
|
if let Event::Key(key) = event {
|
||||||
|
// Close popup on Escape
|
||||||
|
if matches!(key.code, KeyCode::Esc) {
|
||||||
|
if self.popup_menu.is_some() {
|
||||||
|
self.popup_menu = None;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
match key.code {
|
match key.code {
|
||||||
KeyCode::Char('q') => {
|
KeyCode::Char('q') => {
|
||||||
self.should_quit = true;
|
self.should_quit = true;
|
||||||
@ -261,78 +190,214 @@ impl TuiApp {
|
|||||||
self.navigate_host(1);
|
self.navigate_host(1);
|
||||||
}
|
}
|
||||||
KeyCode::Char('r') => {
|
KeyCode::Char('r') => {
|
||||||
match self.focused_panel {
|
// System rebuild command - works on any panel for current host
|
||||||
PanelType::System => {
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
// System rebuild command
|
let connection_ip = self.get_connection_ip(&hostname);
|
||||||
if let Some(hostname) = self.current_host.clone() {
|
// Create command that shows logo, rebuilds, and waits for user input
|
||||||
self.start_command(&hostname, CommandType::SystemRebuild, hostname.clone());
|
let logo_and_rebuild = format!(
|
||||||
return Ok(Some(UiCommand::SystemRebuild { hostname }));
|
"echo 'Rebuilding system: {} ({})' && ssh -tt {}@{} \"bash -ic '{}'\"",
|
||||||
}
|
hostname,
|
||||||
}
|
connection_ip,
|
||||||
PanelType::Services => {
|
self.config.ssh.rebuild_user,
|
||||||
// Service restart command
|
connection_ip,
|
||||||
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
self.config.ssh.rebuild_cmd
|
||||||
self.start_command(&hostname, CommandType::ServiceRestart, service_name.clone());
|
);
|
||||||
return Ok(Some(UiCommand::ServiceRestart { hostname, service_name }));
|
|
||||||
}
|
std::process::Command::new("tmux")
|
||||||
}
|
.arg("split-window")
|
||||||
_ => {
|
.arg("-v")
|
||||||
info!("Manual refresh requested");
|
.arg("-p")
|
||||||
}
|
.arg("30")
|
||||||
|
.arg(&logo_and_rebuild)
|
||||||
|
.spawn()
|
||||||
|
.ok(); // Ignore errors, tmux will handle them
|
||||||
|
}
|
||||||
|
}
|
||||||
|
KeyCode::Char('B') => {
|
||||||
|
// Backup command - works on any panel for current host
|
||||||
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
|
let connection_ip = self.get_connection_ip(&hostname);
|
||||||
|
// Create command that shows logo, runs backup, and waits for user input
|
||||||
|
let logo_and_backup = format!(
|
||||||
|
"echo 'Running backup: {} ({})' && ssh -tt {}@{} \"bash -ic '{}'\"",
|
||||||
|
hostname,
|
||||||
|
connection_ip,
|
||||||
|
self.config.ssh.rebuild_user,
|
||||||
|
connection_ip,
|
||||||
|
format!("{} start borgbackup", self.config.ssh.service_manage_cmd)
|
||||||
|
);
|
||||||
|
|
||||||
|
std::process::Command::new("tmux")
|
||||||
|
.arg("split-window")
|
||||||
|
.arg("-v")
|
||||||
|
.arg("-p")
|
||||||
|
.arg("30")
|
||||||
|
.arg(&logo_and_backup)
|
||||||
|
.spawn()
|
||||||
|
.ok(); // Ignore errors, tmux will handle them
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
KeyCode::Char('s') => {
|
KeyCode::Char('s') => {
|
||||||
if self.focused_panel == PanelType::Services {
|
// Service start command via SSH with progress display
|
||||||
// Service start command
|
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||||
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
let connection_ip = self.get_connection_ip(&hostname);
|
||||||
self.start_command(&hostname, CommandType::ServiceStart, service_name.clone());
|
let service_start_command = format!(
|
||||||
return Ok(Some(UiCommand::ServiceStart { hostname, service_name }));
|
"echo 'Starting service: {} on {}' && ssh -tt {}@{} \"bash -ic '{} start {}'\"",
|
||||||
}
|
service_name,
|
||||||
|
hostname,
|
||||||
|
self.config.ssh.rebuild_user,
|
||||||
|
connection_ip,
|
||||||
|
self.config.ssh.service_manage_cmd,
|
||||||
|
service_name
|
||||||
|
);
|
||||||
|
|
||||||
|
std::process::Command::new("tmux")
|
||||||
|
.arg("split-window")
|
||||||
|
.arg("-v")
|
||||||
|
.arg("-p")
|
||||||
|
.arg("30")
|
||||||
|
.arg(&service_start_command)
|
||||||
|
.spawn()
|
||||||
|
.ok(); // Ignore errors, tmux will handle them
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
KeyCode::Char('S') => {
|
KeyCode::Char('S') => {
|
||||||
if self.focused_panel == PanelType::Services {
|
// Service stop command via SSH with progress display
|
||||||
// Service stop command
|
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||||
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
let connection_ip = self.get_connection_ip(&hostname);
|
||||||
self.start_command(&hostname, CommandType::ServiceStop, service_name.clone());
|
let service_stop_command = format!(
|
||||||
return Ok(Some(UiCommand::ServiceStop { hostname, service_name }));
|
"echo 'Stopping service: {} on {}' && ssh -tt {}@{} \"bash -ic '{} stop {}'\"",
|
||||||
|
service_name,
|
||||||
|
hostname,
|
||||||
|
self.config.ssh.rebuild_user,
|
||||||
|
connection_ip,
|
||||||
|
self.config.ssh.service_manage_cmd,
|
||||||
|
service_name
|
||||||
|
);
|
||||||
|
|
||||||
|
std::process::Command::new("tmux")
|
||||||
|
.arg("split-window")
|
||||||
|
.arg("-v")
|
||||||
|
.arg("-p")
|
||||||
|
.arg("30")
|
||||||
|
.arg(&service_stop_command)
|
||||||
|
.spawn()
|
||||||
|
.ok(); // Ignore errors, tmux will handle them
|
||||||
|
}
|
||||||
|
}
|
||||||
|
KeyCode::Char('L') => {
|
||||||
|
// Show service logs via service-manage script in tmux split window
|
||||||
|
if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
|
||||||
|
let connection_ip = self.get_connection_ip(&hostname);
|
||||||
|
let logs_command = format!(
|
||||||
|
"ssh -tt {}@{} '{} logs {}'",
|
||||||
|
self.config.ssh.rebuild_user,
|
||||||
|
connection_ip,
|
||||||
|
self.config.ssh.service_manage_cmd,
|
||||||
|
service_name
|
||||||
|
);
|
||||||
|
|
||||||
|
std::process::Command::new("tmux")
|
||||||
|
.arg("split-window")
|
||||||
|
.arg("-v")
|
||||||
|
.arg("-p")
|
||||||
|
.arg("30")
|
||||||
|
.arg(&logs_command)
|
||||||
|
.spawn()
|
||||||
|
.ok(); // Ignore errors, tmux will handle them
|
||||||
|
}
|
||||||
|
}
|
||||||
|
KeyCode::Char('w') => {
|
||||||
|
// Wake on LAN for offline hosts
|
||||||
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
|
// Check if host has MAC address configured
|
||||||
|
if let Some(host_details) = self.config.hosts.get(&hostname) {
|
||||||
|
if let Some(mac_address) = &host_details.mac_address {
|
||||||
|
// Parse MAC address and send WoL packet
|
||||||
|
let mac_bytes = Self::parse_mac_address(mac_address);
|
||||||
|
match mac_bytes {
|
||||||
|
Ok(mac) => {
|
||||||
|
match MagicPacket::new(&mac).send() {
|
||||||
|
Ok(_) => {
|
||||||
|
info!("WakeOnLAN packet sent successfully to {} ({})", hostname, mac_address);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::error!("Failed to send WakeOnLAN packet to {}: {}", hostname, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
tracing::error!("Invalid MAC address format for {}: {}", hostname, mac_address);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
KeyCode::Char('b') => {
|
KeyCode::Char('t') => {
|
||||||
if self.focused_panel == PanelType::Backup {
|
// Open SSH terminal session in tmux window
|
||||||
// Trigger backup
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
if let Some(hostname) = self.current_host.clone() {
|
let connection_ip = self.get_connection_ip(&hostname);
|
||||||
self.start_command(&hostname, CommandType::BackupTrigger, hostname.clone());
|
let ssh_command = format!(
|
||||||
return Ok(Some(UiCommand::TriggerBackup { hostname }));
|
"echo 'Opening SSH terminal to: {}' && ssh -tt {}@{}",
|
||||||
}
|
hostname,
|
||||||
|
self.config.ssh.rebuild_user,
|
||||||
|
connection_ip
|
||||||
|
);
|
||||||
|
|
||||||
|
std::process::Command::new("tmux")
|
||||||
|
.arg("split-window")
|
||||||
|
.arg("-v")
|
||||||
|
.arg("-p")
|
||||||
|
.arg("30") // Use 30% like other commands
|
||||||
|
.arg(&ssh_command)
|
||||||
|
.spawn()
|
||||||
|
.ok(); // Ignore errors, tmux will handle them
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
KeyCode::Tab => {
|
KeyCode::Tab => {
|
||||||
if key.modifiers.contains(KeyModifiers::SHIFT) {
|
// Tab cycles to next host
|
||||||
// Shift+Tab cycles through panels
|
self.navigate_host(1);
|
||||||
self.next_panel();
|
}
|
||||||
} else {
|
KeyCode::Up | KeyCode::Char('k') => {
|
||||||
// Tab cycles to next host
|
// Move service selection up
|
||||||
self.navigate_host(1);
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
|
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||||
|
host_widgets.services_widget.select_previous();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
KeyCode::BackTab => {
|
KeyCode::Down | KeyCode::Char('j') => {
|
||||||
// BackTab (Shift+Tab on some terminals) also cycles panels
|
// Move service selection down
|
||||||
self.next_panel();
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
}
|
let total_services = {
|
||||||
KeyCode::Up => {
|
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||||
// Scroll up in focused panel
|
host_widgets.services_widget.get_total_services_count()
|
||||||
self.scroll_focused_panel(-1);
|
};
|
||||||
}
|
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||||
KeyCode::Down => {
|
host_widgets.services_widget.select_next(total_services);
|
||||||
// Scroll down in focused panel
|
}
|
||||||
self.scroll_focused_panel(1);
|
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(None)
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Switch to a specific host by name
|
||||||
|
pub fn switch_to_host(&mut self, hostname: &str) {
|
||||||
|
if let Some(index) = self.available_hosts.iter().position(|h| h == hostname) {
|
||||||
|
self.host_index = index;
|
||||||
|
self.current_host = Some(hostname.to_string());
|
||||||
|
|
||||||
|
// Check if user navigated away from localhost
|
||||||
|
if hostname != &self.localhost {
|
||||||
|
self.user_navigated_away = true;
|
||||||
|
} else {
|
||||||
|
self.user_navigated_away = false; // User navigated back to localhost
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Switched to host: {}", hostname);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Navigate between hosts
|
/// Navigate between hosts
|
||||||
@ -353,51 +418,20 @@ impl TuiApp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
self.current_host = Some(self.available_hosts[self.host_index].clone());
|
self.current_host = Some(self.available_hosts[self.host_index].clone());
|
||||||
|
|
||||||
// Check if user navigated away from localhost
|
// Check if user navigated away from localhost
|
||||||
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
|
||||||
if let Some(ref current) = self.current_host {
|
if let Some(ref current) = self.current_host {
|
||||||
if current != &localhost {
|
if current != &self.localhost {
|
||||||
self.user_navigated_away = true;
|
self.user_navigated_away = true;
|
||||||
} else {
|
} else {
|
||||||
self.user_navigated_away = false; // User navigated back to localhost
|
self.user_navigated_away = false; // User navigated back to localhost
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("Switched to host: {}", self.current_host.as_ref().unwrap());
|
info!("Switched to host: {}", self.current_host.as_ref().unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if a host is currently rebuilding
|
|
||||||
pub fn is_host_rebuilding(&self, hostname: &str) -> bool {
|
|
||||||
if let Some(host_widgets) = self.host_widgets.get(hostname) {
|
|
||||||
matches!(
|
|
||||||
&host_widgets.command_status,
|
|
||||||
Some(CommandStatus::InProgress { command_type: CommandType::SystemRebuild, .. })
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Switch to next panel (Shift+Tab) - only cycles through visible panels
|
|
||||||
pub fn next_panel(&mut self) {
|
|
||||||
let visible_panels = self.get_visible_panels();
|
|
||||||
if visible_panels.len() <= 1 {
|
|
||||||
return; // Can't switch if only one or no panels visible
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find current panel index in visible panels
|
|
||||||
if let Some(current_index) = visible_panels.iter().position(|&p| p == self.focused_panel) {
|
|
||||||
// Move to next visible panel
|
|
||||||
let next_index = (current_index + 1) % visible_panels.len();
|
|
||||||
self.focused_panel = visible_panels[next_index];
|
|
||||||
} else {
|
|
||||||
// Current panel not visible, switch to first visible panel
|
|
||||||
self.focused_panel = visible_panels[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("Switched to panel: {:?}", self.focused_panel);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -411,153 +445,24 @@ impl TuiApp {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the list of available hosts
|
||||||
|
pub fn get_available_hosts(&self) -> &Vec<String> {
|
||||||
|
&self.available_hosts
|
||||||
|
}
|
||||||
|
|
||||||
/// Should quit application
|
/// Should quit application
|
||||||
pub fn should_quit(&self) -> bool {
|
pub fn should_quit(&self) -> bool {
|
||||||
self.should_quit
|
self.should_quit
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Start command execution and track status for visual feedback
|
|
||||||
pub fn start_command(&mut self, hostname: &str, command_type: CommandType, target: String) {
|
|
||||||
if let Some(host_widgets) = self.host_widgets.get_mut(hostname) {
|
|
||||||
host_widgets.command_status = Some(CommandStatus::InProgress {
|
|
||||||
command_type,
|
|
||||||
target,
|
|
||||||
start_time: Instant::now(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Mark command as completed successfully
|
|
||||||
pub fn complete_command(&mut self, hostname: &str) {
|
|
||||||
if let Some(host_widgets) = self.host_widgets.get_mut(hostname) {
|
|
||||||
if let Some(CommandStatus::InProgress { command_type, .. }) = &host_widgets.command_status {
|
|
||||||
host_widgets.command_status = Some(CommandStatus::Success {
|
|
||||||
command_type: command_type.clone(),
|
|
||||||
completed_at: Instant::now(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Check for command timeouts and automatically clear them
|
|
||||||
pub fn check_command_timeouts(&mut self) {
|
|
||||||
let now = Instant::now();
|
|
||||||
let mut hosts_to_clear = Vec::new();
|
|
||||||
|
|
||||||
for (hostname, host_widgets) in &self.host_widgets {
|
|
||||||
if let Some(CommandStatus::InProgress { command_type, start_time, .. }) = &host_widgets.command_status {
|
|
||||||
let timeout_duration = match command_type {
|
|
||||||
CommandType::SystemRebuild => Duration::from_secs(300), // 5 minutes for rebuilds
|
|
||||||
_ => Duration::from_secs(30), // 30 seconds for service commands
|
|
||||||
};
|
|
||||||
|
|
||||||
if now.duration_since(*start_time) > timeout_duration {
|
|
||||||
hosts_to_clear.push(hostname.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Also clear success/failed status after display time
|
|
||||||
else if let Some(CommandStatus::Success { completed_at, .. }) = &host_widgets.command_status {
|
|
||||||
if now.duration_since(*completed_at) > Duration::from_secs(3) {
|
|
||||||
hosts_to_clear.push(hostname.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clear timed out commands
|
|
||||||
for hostname in hosts_to_clear {
|
|
||||||
if let Some(host_widgets) = self.host_widgets.get_mut(&hostname) {
|
|
||||||
host_widgets.command_status = None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check for rebuild completion by detecting agent hash changes
|
|
||||||
pub fn check_rebuild_completion(&mut self, metric_store: &MetricStore) {
|
|
||||||
let mut hosts_to_complete = Vec::new();
|
|
||||||
|
|
||||||
for (hostname, host_widgets) in &self.host_widgets {
|
|
||||||
if let Some(CommandStatus::InProgress { command_type: CommandType::SystemRebuild, .. }) = &host_widgets.command_status {
|
|
||||||
// Check if agent hash has changed (indicating successful rebuild)
|
|
||||||
if let Some(agent_hash_metric) = metric_store.get_metric(hostname, "system_agent_hash") {
|
|
||||||
if let cm_dashboard_shared::MetricValue::String(current_hash) = &agent_hash_metric.value {
|
|
||||||
// Compare with stored hash (if we have one)
|
|
||||||
if let Some(stored_hash) = host_widgets.system_widget.get_agent_hash() {
|
|
||||||
if current_hash != stored_hash {
|
|
||||||
// Agent hash changed - rebuild completed successfully
|
|
||||||
hosts_to_complete.push(hostname.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mark rebuilds as completed
|
|
||||||
for hostname in hosts_to_complete {
|
|
||||||
self.complete_command(&hostname);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Scroll the focused panel up or down
|
|
||||||
pub fn scroll_focused_panel(&mut self, direction: i32) {
|
|
||||||
if let Some(hostname) = self.current_host.clone() {
|
|
||||||
let focused_panel = self.focused_panel; // Get the value before borrowing
|
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
|
||||||
|
|
||||||
match focused_panel {
|
|
||||||
PanelType::System => {
|
|
||||||
if direction > 0 {
|
|
||||||
host_widgets.system_scroll_offset = host_widgets.system_scroll_offset.saturating_add(1);
|
|
||||||
} else {
|
|
||||||
host_widgets.system_scroll_offset = host_widgets.system_scroll_offset.saturating_sub(1);
|
|
||||||
}
|
|
||||||
info!("System panel scroll offset: {}", host_widgets.system_scroll_offset);
|
|
||||||
}
|
|
||||||
PanelType::Services => {
|
|
||||||
// For services panel, Up/Down moves selection cursor, not scroll
|
|
||||||
let total_services = host_widgets.services_widget.get_total_services_count();
|
|
||||||
|
|
||||||
if direction > 0 {
|
|
||||||
host_widgets.services_widget.select_next(total_services);
|
|
||||||
info!("Services selection moved down");
|
|
||||||
} else {
|
|
||||||
host_widgets.services_widget.select_previous();
|
|
||||||
info!("Services selection moved up");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PanelType::Backup => {
|
|
||||||
if direction > 0 {
|
|
||||||
host_widgets.backup_scroll_offset = host_widgets.backup_scroll_offset.saturating_add(1);
|
|
||||||
} else {
|
|
||||||
host_widgets.backup_scroll_offset = host_widgets.backup_scroll_offset.saturating_sub(1);
|
|
||||||
}
|
|
||||||
info!("Backup panel scroll offset: {}", host_widgets.backup_scroll_offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Get list of currently visible panels
|
|
||||||
fn get_visible_panels(&self) -> Vec<PanelType> {
|
|
||||||
let mut visible_panels = vec![PanelType::System, PanelType::Services];
|
|
||||||
|
|
||||||
// Check if backup panel should be shown
|
|
||||||
if let Some(hostname) = &self.current_host {
|
|
||||||
if let Some(host_widgets) = self.host_widgets.get(hostname) {
|
|
||||||
if host_widgets.backup_widget.has_data() {
|
|
||||||
visible_panels.push(PanelType::Backup);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
visible_panels
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Render the dashboard (real btop-style multi-panel layout)
|
/// Render the dashboard (real btop-style multi-panel layout)
|
||||||
pub fn render(&mut self, frame: &mut Frame, metric_store: &MetricStore) {
|
pub fn render(&mut self, frame: &mut Frame, metric_store: &MetricStore) -> (Rect, Rect, Rect) {
|
||||||
let size = frame.size();
|
let size = frame.size();
|
||||||
|
|
||||||
// Clear background to true black like btop
|
// Clear background to true black like btop
|
||||||
@ -586,56 +491,54 @@ impl TuiApp {
|
|||||||
])
|
])
|
||||||
.split(main_chunks[1]); // main_chunks[1] is now the content area (between title and statusbar)
|
.split(main_chunks[1]); // main_chunks[1] is now the content area (between title and statusbar)
|
||||||
|
|
||||||
// Check if backup panel should be shown
|
// Check if current host is offline
|
||||||
let show_backup = if let Some(hostname) = self.current_host.clone() {
|
let current_host_offline = if let Some(hostname) = self.current_host.clone() {
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
self.calculate_host_status(&hostname, metric_store) == Status::Offline
|
||||||
host_widgets.backup_widget.has_data()
|
|
||||||
} else {
|
} else {
|
||||||
false
|
true // No host selected is considered offline
|
||||||
};
|
};
|
||||||
|
|
||||||
// Left side: dynamic layout based on backup data availability
|
// If host is offline, render wake-up message instead of panels
|
||||||
let left_chunks = if show_backup {
|
if current_host_offline {
|
||||||
// Show both system and backup panels
|
self.render_offline_host_message(frame, main_chunks[1]);
|
||||||
ratatui::layout::Layout::default()
|
self.render_btop_title(frame, main_chunks[0], metric_store);
|
||||||
.direction(Direction::Vertical)
|
self.render_statusbar(frame, main_chunks[2], metric_store);
|
||||||
.constraints([
|
return (main_chunks[0], Rect::default(), Rect::default()); // Return title area and empty areas when offline
|
||||||
Constraint::Percentage(ThemeLayout::SYSTEM_PANEL_HEIGHT), // System section
|
}
|
||||||
Constraint::Percentage(ThemeLayout::BACKUP_PANEL_HEIGHT), // Backup section
|
|
||||||
])
|
// Left side: system panel only (full height)
|
||||||
.split(content_chunks[0])
|
let left_chunks = ratatui::layout::Layout::default()
|
||||||
} else {
|
.direction(Direction::Vertical)
|
||||||
// Show only system panel (full height)
|
.constraints([Constraint::Percentage(100)]) // System section takes full height
|
||||||
ratatui::layout::Layout::default()
|
.split(content_chunks[0]);
|
||||||
.direction(Direction::Vertical)
|
|
||||||
.constraints([Constraint::Percentage(100)]) // System section takes full height
|
|
||||||
.split(content_chunks[0])
|
|
||||||
};
|
|
||||||
|
|
||||||
// Render title bar
|
// Render title bar
|
||||||
self.render_btop_title(frame, main_chunks[0], metric_store);
|
self.render_btop_title(frame, main_chunks[0], metric_store);
|
||||||
|
|
||||||
// Render new panel layout
|
// Render system panel
|
||||||
self.render_system_panel(frame, left_chunks[0], metric_store);
|
let system_area = left_chunks[0];
|
||||||
if show_backup && left_chunks.len() > 1 {
|
self.render_system_panel(frame, system_area, metric_store);
|
||||||
self.render_backup_panel(frame, left_chunks[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Render services widget for current host
|
// Render services widget for current host
|
||||||
|
let services_area = content_chunks[1];
|
||||||
if let Some(hostname) = self.current_host.clone() {
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
let is_focused = self.focused_panel == PanelType::Services;
|
let is_focused = true; // Always show service selection
|
||||||
let (scroll_offset, command_status) = {
|
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
|
||||||
(host_widgets.services_scroll_offset, host_widgets.command_status.clone())
|
|
||||||
};
|
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||||
host_widgets
|
host_widgets
|
||||||
.services_widget
|
.services_widget
|
||||||
.render_with_command_status(frame, content_chunks[1], is_focused, scroll_offset, command_status.as_ref()); // Services takes full right side
|
.render(frame, services_area, is_focused); // Services takes full right side
|
||||||
}
|
}
|
||||||
|
|
||||||
// Render statusbar at the bottom
|
// Render statusbar at the bottom
|
||||||
self.render_statusbar(frame, main_chunks[2]); // main_chunks[2] is the statusbar area
|
self.render_statusbar(frame, main_chunks[2], metric_store);
|
||||||
|
|
||||||
|
// Render popup menu on top of everything if active
|
||||||
|
if let Some(ref popup) = self.popup_menu {
|
||||||
|
self.render_popup_menu(frame, popup);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return all areas for mouse event handling
|
||||||
|
(main_chunks[0], system_area, services_area)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Render btop-style minimal title with host status colors
|
/// Render btop-style minimal title with host status colors
|
||||||
@ -646,193 +549,359 @@ impl TuiApp {
|
|||||||
|
|
||||||
if self.available_hosts.is_empty() {
|
if self.available_hosts.is_empty() {
|
||||||
let title_text = "cm-dashboard • no hosts discovered";
|
let title_text = "cm-dashboard • no hosts discovered";
|
||||||
let title = Paragraph::new(title_text).style(Typography::title());
|
let title = Paragraph::new(title_text)
|
||||||
|
.style(Style::default().fg(Theme::background()).bg(Theme::status_color(Status::Unknown)));
|
||||||
frame.render_widget(title, area);
|
frame.render_widget(title, area);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create spans for each host with status indicators
|
// Calculate worst-case status across all hosts (excluding offline)
|
||||||
let mut spans = vec![Span::styled("cm-dashboard • ", Typography::title())];
|
let mut worst_status = Status::Ok;
|
||||||
|
for host in &self.available_hosts {
|
||||||
|
let host_status = self.calculate_host_status(host, metric_store);
|
||||||
|
// Don't include offline hosts in status aggregation
|
||||||
|
if host_status != Status::Offline {
|
||||||
|
worst_status = Status::aggregate(&[worst_status, host_status]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the worst status color as background
|
||||||
|
let background_color = Theme::status_color(worst_status);
|
||||||
|
|
||||||
|
// Split the title bar into left and right sections
|
||||||
|
let chunks = Layout::default()
|
||||||
|
.direction(Direction::Horizontal)
|
||||||
|
.constraints([Constraint::Length(22), Constraint::Min(0)])
|
||||||
|
.split(area);
|
||||||
|
|
||||||
|
// Left side: "cm-dashboard" text with version
|
||||||
|
let title_text = format!(" cm-dashboard v{}", env!("CARGO_PKG_VERSION"));
|
||||||
|
let left_span = Span::styled(
|
||||||
|
&title_text,
|
||||||
|
Style::default().fg(Theme::background()).bg(background_color).add_modifier(Modifier::BOLD)
|
||||||
|
);
|
||||||
|
let left_title = Paragraph::new(Line::from(vec![left_span]))
|
||||||
|
.style(Style::default().bg(background_color));
|
||||||
|
frame.render_widget(left_title, chunks[0]);
|
||||||
|
|
||||||
|
// Right side: hosts with status indicators
|
||||||
|
let mut host_spans = Vec::new();
|
||||||
|
|
||||||
for (i, host) in self.available_hosts.iter().enumerate() {
|
for (i, host) in self.available_hosts.iter().enumerate() {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
spans.push(Span::styled(" ", Typography::title()));
|
host_spans.push(Span::styled(
|
||||||
|
" ",
|
||||||
|
Style::default().fg(Theme::background()).bg(background_color)
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if this host has a command status that affects the icon
|
// Always show normal status icon based on metrics (no command status at host level)
|
||||||
let (status_icon, status_color) = if let Some(host_widgets) = self.host_widgets.get(host) {
|
let host_status = self.calculate_host_status(host, metric_store);
|
||||||
match &host_widgets.command_status {
|
let status_icon = StatusIcons::get_icon(host_status);
|
||||||
Some(CommandStatus::InProgress { command_type: CommandType::SystemRebuild, .. }) => {
|
|
||||||
// Show blue circular arrow during rebuild
|
|
||||||
("↻", Theme::highlight())
|
|
||||||
}
|
|
||||||
Some(CommandStatus::Success { command_type: CommandType::SystemRebuild, .. }) => {
|
|
||||||
// Show green checkmark for successful rebuild
|
|
||||||
("✓", Theme::success())
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
// Normal status icon based on metrics
|
|
||||||
let host_status = self.calculate_host_status(host, metric_store);
|
|
||||||
(StatusIcons::get_icon(host_status), Theme::status_color(host_status))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// No host widgets yet, use normal status
|
|
||||||
let host_status = self.calculate_host_status(host, metric_store);
|
|
||||||
(StatusIcons::get_icon(host_status), Theme::status_color(host_status))
|
|
||||||
};
|
|
||||||
|
|
||||||
// Add status icon
|
// Add status icon with background color as foreground against status background
|
||||||
spans.push(Span::styled(
|
host_spans.push(Span::styled(
|
||||||
format!("{} ", status_icon),
|
format!("{} ", status_icon),
|
||||||
Style::default().fg(status_color),
|
Style::default().fg(Theme::background()).bg(background_color),
|
||||||
));
|
));
|
||||||
|
|
||||||
if Some(host) == self.current_host.as_ref() {
|
if Some(host) == self.current_host.as_ref() {
|
||||||
// Selected host in bold bright white
|
// Selected host with brackets in bold background color against status background
|
||||||
spans.push(Span::styled(
|
host_spans.push(Span::styled(
|
||||||
|
"[",
|
||||||
|
Style::default()
|
||||||
|
.fg(Theme::background())
|
||||||
|
.bg(background_color)
|
||||||
|
.add_modifier(Modifier::BOLD),
|
||||||
|
));
|
||||||
|
host_spans.push(Span::styled(
|
||||||
host.clone(),
|
host.clone(),
|
||||||
Typography::title().add_modifier(Modifier::BOLD),
|
Style::default()
|
||||||
|
.fg(Theme::background())
|
||||||
|
.bg(background_color)
|
||||||
|
.add_modifier(Modifier::BOLD),
|
||||||
|
));
|
||||||
|
host_spans.push(Span::styled(
|
||||||
|
"]",
|
||||||
|
Style::default()
|
||||||
|
.fg(Theme::background())
|
||||||
|
.bg(background_color)
|
||||||
|
.add_modifier(Modifier::BOLD),
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
// Other hosts in normal style with status color
|
// Other hosts in normal background color against status background
|
||||||
spans.push(Span::styled(
|
host_spans.push(Span::styled(
|
||||||
host.clone(),
|
host.clone(),
|
||||||
Style::default().fg(status_color),
|
Style::default().fg(Theme::background()).bg(background_color),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let title_line = Line::from(spans);
|
// Add right padding
|
||||||
let title = Paragraph::new(vec![title_line]);
|
host_spans.push(Span::styled(
|
||||||
|
" ",
|
||||||
|
Style::default().fg(Theme::background()).bg(background_color)
|
||||||
|
));
|
||||||
|
|
||||||
frame.render_widget(title, area);
|
let host_line = Line::from(host_spans);
|
||||||
|
let host_title = Paragraph::new(vec![host_line])
|
||||||
|
.style(Style::default().bg(background_color))
|
||||||
|
.alignment(ratatui::layout::Alignment::Right);
|
||||||
|
frame.render_widget(host_title, chunks[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate overall status for a host based on its metrics
|
/// Calculate overall status for a host based on its structured data
|
||||||
fn calculate_host_status(&self, hostname: &str, metric_store: &MetricStore) -> Status {
|
fn calculate_host_status(&self, hostname: &str, metric_store: &MetricStore) -> Status {
|
||||||
let metrics = metric_store.get_metrics_for_host(hostname);
|
// Check if we have structured data for this host
|
||||||
|
if let Some(_agent_data) = metric_store.get_agent_data(hostname) {
|
||||||
if metrics.is_empty() {
|
// Return OK since we have data
|
||||||
return Status::Unknown;
|
|
||||||
}
|
|
||||||
|
|
||||||
// First check if we have the aggregated host status summary from the agent
|
|
||||||
if let Some(host_summary_metric) = metric_store.get_metric(hostname, "host_status_summary") {
|
|
||||||
return host_summary_metric.status;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback to old aggregation logic with proper Pending handling
|
|
||||||
let mut has_critical = false;
|
|
||||||
let mut has_warning = false;
|
|
||||||
let mut has_pending = false;
|
|
||||||
let mut ok_count = 0;
|
|
||||||
|
|
||||||
for metric in &metrics {
|
|
||||||
match metric.status {
|
|
||||||
Status::Critical => has_critical = true,
|
|
||||||
Status::Warning => has_warning = true,
|
|
||||||
Status::Pending => has_pending = true,
|
|
||||||
Status::Ok => ok_count += 1,
|
|
||||||
Status::Unknown => {} // Ignore unknown for aggregation
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Priority order: Critical > Warning > Pending > Ok > Unknown
|
|
||||||
if has_critical {
|
|
||||||
Status::Critical
|
|
||||||
} else if has_warning {
|
|
||||||
Status::Warning
|
|
||||||
} else if has_pending {
|
|
||||||
Status::Pending
|
|
||||||
} else if ok_count > 0 {
|
|
||||||
Status::Ok
|
Status::Ok
|
||||||
} else {
|
} else {
|
||||||
Status::Unknown
|
Status::Offline
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Render dynamic statusbar with context-aware shortcuts
|
/// Render popup menu for service actions
|
||||||
fn render_statusbar(&self, frame: &mut Frame, area: Rect) {
|
fn render_popup_menu(&self, frame: &mut Frame, popup: &PopupMenu) {
|
||||||
let shortcuts = self.get_context_shortcuts();
|
use ratatui::widgets::{Block, Borders, Clear, List, ListItem};
|
||||||
let statusbar_text = shortcuts.join(" • ");
|
use ratatui::style::{Color, Modifier};
|
||||||
|
|
||||||
let statusbar = Paragraph::new(statusbar_text)
|
// Menu items
|
||||||
.style(Typography::secondary())
|
let items = vec![
|
||||||
.alignment(ratatui::layout::Alignment::Center);
|
"Start Service",
|
||||||
|
"Stop Service",
|
||||||
|
"View Logs",
|
||||||
|
];
|
||||||
|
|
||||||
|
// Calculate popup size
|
||||||
|
let width = 20;
|
||||||
|
let height = items.len() as u16 + 2; // +2 for borders
|
||||||
|
|
||||||
|
// Position popup near click location, but keep it on screen
|
||||||
|
let screen_width = frame.size().width;
|
||||||
|
let screen_height = frame.size().height;
|
||||||
|
|
||||||
|
let x = if popup.x + width < screen_width {
|
||||||
|
popup.x
|
||||||
|
} else {
|
||||||
|
screen_width.saturating_sub(width)
|
||||||
|
};
|
||||||
|
|
||||||
|
let y = if popup.y + height < screen_height {
|
||||||
|
popup.y
|
||||||
|
} else {
|
||||||
|
screen_height.saturating_sub(height)
|
||||||
|
};
|
||||||
|
|
||||||
|
let popup_area = Rect {
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create menu items with selection highlight
|
||||||
|
let menu_items: Vec<ListItem> = items
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, item)| {
|
||||||
|
let style = if i == popup.selected_index {
|
||||||
|
Style::default()
|
||||||
|
.fg(Color::Black)
|
||||||
|
.bg(Color::White)
|
||||||
|
.add_modifier(Modifier::BOLD)
|
||||||
|
} else {
|
||||||
|
Style::default().fg(Theme::primary_text())
|
||||||
|
};
|
||||||
|
ListItem::new(*item).style(style)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let menu_list = List::new(menu_items)
|
||||||
|
.block(
|
||||||
|
Block::default()
|
||||||
|
.borders(Borders::ALL)
|
||||||
|
.style(Style::default().bg(Theme::background()).fg(Theme::primary_text()))
|
||||||
|
);
|
||||||
|
|
||||||
|
// Clear the area and render menu
|
||||||
|
frame.render_widget(Clear, popup_area);
|
||||||
|
frame.render_widget(menu_list, popup_area);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render statusbar with host and client IPs
|
||||||
|
fn render_statusbar(&self, frame: &mut Frame, area: Rect, _metric_store: &MetricStore) {
|
||||||
|
use ratatui::text::{Line, Span};
|
||||||
|
use ratatui::widgets::Paragraph;
|
||||||
|
|
||||||
|
// Get current host info
|
||||||
|
let (hostname_str, host_ip, build_version, agent_version) = if let Some(hostname) = &self.current_host {
|
||||||
|
// Get the connection IP (the IP dashboard uses to connect to the agent)
|
||||||
|
let ip = if let Some(host_details) = self.config.hosts.get(hostname) {
|
||||||
|
host_details.get_connection_ip(hostname)
|
||||||
|
} else {
|
||||||
|
hostname.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Get build and agent versions from system widget
|
||||||
|
let (build, agent) = if let Some(host_widgets) = self.host_widgets.get(hostname) {
|
||||||
|
let build = host_widgets.system_widget.get_build_version().unwrap_or("N/A".to_string());
|
||||||
|
let agent = host_widgets.system_widget.get_agent_version().unwrap_or("N/A".to_string());
|
||||||
|
(build, agent)
|
||||||
|
} else {
|
||||||
|
("N/A".to_string(), "N/A".to_string())
|
||||||
|
};
|
||||||
|
|
||||||
|
(hostname.clone(), ip, build, agent)
|
||||||
|
} else {
|
||||||
|
("None".to_string(), "N/A".to_string(), "N/A".to_string(), "N/A".to_string())
|
||||||
|
};
|
||||||
|
|
||||||
|
let left_text = format!("Host: {} | {} | Build:{} | Agent:{}", hostname_str, host_ip, build_version, agent_version);
|
||||||
|
|
||||||
|
// Get dashboard local IP
|
||||||
|
let dashboard_ip = Self::get_local_ip();
|
||||||
|
let right_text = format!("Dashboard: {}", dashboard_ip);
|
||||||
|
|
||||||
|
// Calculate spacing to push right text to the right (accounting for 1 char left padding)
|
||||||
|
let spacing = area.width as usize - left_text.len() - right_text.len() - 2; // -2 for left padding
|
||||||
|
let spacing_str = " ".repeat(spacing.max(1));
|
||||||
|
|
||||||
|
let line = Line::from(vec![
|
||||||
|
Span::raw(" "), // 1 char left padding
|
||||||
|
Span::styled(left_text, Style::default().fg(Theme::border())),
|
||||||
|
Span::raw(spacing_str),
|
||||||
|
Span::styled(right_text, Style::default().fg(Theme::border())),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let statusbar = Paragraph::new(line);
|
||||||
frame.render_widget(statusbar, area);
|
frame.render_widget(statusbar, area);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get context-aware shortcuts based on focused panel
|
/// Get local IP address of the dashboard
|
||||||
fn get_context_shortcuts(&self) -> Vec<String> {
|
fn get_local_ip() -> String {
|
||||||
let mut shortcuts = Vec::new();
|
use std::net::UdpSocket;
|
||||||
|
|
||||||
// Global shortcuts
|
// Try to get local IP by creating a UDP socket
|
||||||
shortcuts.push("Tab: Switch Host".to_string());
|
// This doesn't actually send data, just determines routing
|
||||||
shortcuts.push("Shift+Tab: Switch Panel".to_string());
|
if let Ok(socket) = UdpSocket::bind("0.0.0.0:0") {
|
||||||
|
if socket.connect("8.8.8.8:80").is_ok() {
|
||||||
// Scroll shortcuts (always available)
|
if let Ok(addr) = socket.local_addr() {
|
||||||
shortcuts.push("↑↓: Scroll".to_string());
|
return addr.ip().to_string();
|
||||||
|
}
|
||||||
// Panel-specific shortcuts
|
|
||||||
match self.focused_panel {
|
|
||||||
PanelType::System => {
|
|
||||||
shortcuts.push("R: Rebuild".to_string());
|
|
||||||
}
|
|
||||||
PanelType::Services => {
|
|
||||||
shortcuts.push("S: Start".to_string());
|
|
||||||
shortcuts.push("Shift+S: Stop".to_string());
|
|
||||||
shortcuts.push("R: Restart".to_string());
|
|
||||||
}
|
|
||||||
PanelType::Backup => {
|
|
||||||
shortcuts.push("B: Trigger Backup".to_string());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
"N/A".to_string()
|
||||||
// Always show quit
|
|
||||||
shortcuts.push("Q: Quit".to_string());
|
|
||||||
|
|
||||||
shortcuts
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_system_panel(&mut self, frame: &mut Frame, area: Rect, _metric_store: &MetricStore) {
|
fn render_system_panel(&mut self, frame: &mut Frame, area: Rect, _metric_store: &MetricStore) {
|
||||||
let system_block = if self.focused_panel == PanelType::System {
|
let system_block = Components::widget_block("system");
|
||||||
Components::focused_widget_block("system")
|
|
||||||
} else {
|
|
||||||
Components::widget_block("system")
|
|
||||||
};
|
|
||||||
let inner_area = system_block.inner(area);
|
let inner_area = system_block.inner(area);
|
||||||
frame.render_widget(system_block, area);
|
frame.render_widget(system_block, area);
|
||||||
// Get current host widgets, create if none exist
|
// Get current host widgets, create if none exist
|
||||||
if let Some(hostname) = self.current_host.clone() {
|
if let Some(hostname) = self.current_host.clone() {
|
||||||
let scroll_offset = {
|
// Clone the config to avoid borrowing issues
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
let config = self.config.clone();
|
||||||
host_widgets.system_scroll_offset
|
|
||||||
};
|
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||||
host_widgets.system_widget.render_with_scroll(frame, inner_area, scroll_offset);
|
host_widgets.system_widget.render(frame, inner_area, &hostname, Some(&config));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_backup_panel(&mut self, frame: &mut Frame, area: Rect) {
|
|
||||||
let backup_block = if self.focused_panel == PanelType::Backup {
|
/// Render offline host message with wake-up option
|
||||||
Components::focused_widget_block("backup")
|
fn render_offline_host_message(&self, frame: &mut Frame, area: Rect) {
|
||||||
|
use ratatui::layout::Alignment;
|
||||||
|
use ratatui::style::Modifier;
|
||||||
|
use ratatui::text::{Line, Span};
|
||||||
|
use ratatui::widgets::{Block, Borders, Paragraph};
|
||||||
|
|
||||||
|
// Get hostname for message
|
||||||
|
let hostname = self.current_host.as_ref()
|
||||||
|
.map(|h| h.as_str())
|
||||||
|
.unwrap_or("Unknown");
|
||||||
|
|
||||||
|
// Check if host has MAC address for wake-on-LAN
|
||||||
|
let has_mac = self.current_host.as_ref()
|
||||||
|
.and_then(|hostname| self.config.hosts.get(hostname))
|
||||||
|
.and_then(|details| details.mac_address.as_ref())
|
||||||
|
.is_some();
|
||||||
|
|
||||||
|
// Create message content
|
||||||
|
let mut lines = vec![
|
||||||
|
Line::from(Span::styled(
|
||||||
|
format!("Host '{}' is offline", hostname),
|
||||||
|
Style::default().fg(Theme::muted_text()).add_modifier(Modifier::BOLD),
|
||||||
|
)),
|
||||||
|
Line::from(""),
|
||||||
|
];
|
||||||
|
|
||||||
|
if has_mac {
|
||||||
|
lines.push(Line::from(Span::styled(
|
||||||
|
"Press 'w' to wake up host",
|
||||||
|
Style::default().fg(Theme::primary_text()).add_modifier(Modifier::BOLD),
|
||||||
|
)));
|
||||||
} else {
|
} else {
|
||||||
Components::widget_block("backup")
|
lines.push(Line::from(Span::styled(
|
||||||
};
|
"No MAC address configured - cannot wake up",
|
||||||
let inner_area = backup_block.inner(area);
|
Style::default().fg(Theme::muted_text()),
|
||||||
frame.render_widget(backup_block, area);
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
// Get current host widgets for backup widget
|
// Create centered message
|
||||||
if let Some(hostname) = self.current_host.clone() {
|
let message = Paragraph::new(lines)
|
||||||
let scroll_offset = {
|
.block(Block::default()
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
.borders(Borders::ALL)
|
||||||
host_widgets.backup_scroll_offset
|
.border_style(Style::default().fg(Theme::muted_text()))
|
||||||
};
|
.title(" Offline Host ")
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
.title_style(Style::default().fg(Theme::muted_text()).add_modifier(Modifier::BOLD)))
|
||||||
host_widgets.backup_widget.render_with_scroll(frame, inner_area, scroll_offset);
|
.style(Style::default().bg(Theme::background()).fg(Theme::primary_text()))
|
||||||
|
.alignment(Alignment::Center);
|
||||||
|
|
||||||
|
// Center the message in the available area
|
||||||
|
let popup_area = ratatui::layout::Layout::default()
|
||||||
|
.direction(Direction::Vertical)
|
||||||
|
.constraints([
|
||||||
|
Constraint::Percentage(40),
|
||||||
|
Constraint::Length(6),
|
||||||
|
Constraint::Percentage(40),
|
||||||
|
])
|
||||||
|
.split(area)[1];
|
||||||
|
|
||||||
|
let popup_area = ratatui::layout::Layout::default()
|
||||||
|
.direction(Direction::Horizontal)
|
||||||
|
.constraints([
|
||||||
|
Constraint::Percentage(25),
|
||||||
|
Constraint::Percentage(50),
|
||||||
|
Constraint::Percentage(25),
|
||||||
|
])
|
||||||
|
.split(popup_area)[1];
|
||||||
|
|
||||||
|
frame.render_widget(message, popup_area);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse MAC address string (e.g., "AA:BB:CC:DD:EE:FF") to [u8; 6]
|
||||||
|
/// Get the connection IP for a hostname based on host configuration
|
||||||
|
fn get_connection_ip(&self, hostname: &str) -> String {
|
||||||
|
if let Some(host_details) = self.config.hosts.get(hostname) {
|
||||||
|
host_details.get_connection_ip(hostname)
|
||||||
|
} else {
|
||||||
|
hostname.to_string()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_mac_address(mac_str: &str) -> Result<[u8; 6], &'static str> {
|
||||||
|
let parts: Vec<&str> = mac_str.split(':').collect();
|
||||||
|
if parts.len() != 6 {
|
||||||
|
return Err("MAC address must have 6 parts separated by colons");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut mac = [0u8; 6];
|
||||||
|
for (i, part) in parts.iter().enumerate() {
|
||||||
|
match u8::from_str_radix(part, 16) {
|
||||||
|
Ok(byte) => mac[i] = byte,
|
||||||
|
Err(_) => return Err("Invalid hexadecimal byte in MAC address"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(mac)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -142,11 +142,14 @@ impl Theme {
|
|||||||
/// Get color for status level
|
/// Get color for status level
|
||||||
pub fn status_color(status: Status) -> Color {
|
pub fn status_color(status: Status) -> Color {
|
||||||
match status {
|
match status {
|
||||||
|
Status::Info => Self::muted_text(), // Gray for informational data
|
||||||
Status::Ok => Self::success(),
|
Status::Ok => Self::success(),
|
||||||
|
Status::Inactive => Self::muted_text(), // Gray for inactive services in service list
|
||||||
Status::Pending => Self::highlight(), // Blue for pending
|
Status::Pending => Self::highlight(), // Blue for pending
|
||||||
Status::Warning => Self::warning(),
|
Status::Warning => Self::warning(),
|
||||||
Status::Critical => Self::error(),
|
Status::Critical => Self::error(),
|
||||||
Status::Unknown => Self::muted_text(),
|
Status::Unknown => Self::muted_text(),
|
||||||
|
Status::Offline => Self::muted_text(), // Dark gray for offline
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -223,9 +226,6 @@ impl Layout {
|
|||||||
pub const LEFT_PANEL_WIDTH: u16 = 45;
|
pub const LEFT_PANEL_WIDTH: u16 = 45;
|
||||||
/// Right panel percentage (services)
|
/// Right panel percentage (services)
|
||||||
pub const RIGHT_PANEL_WIDTH: u16 = 55;
|
pub const RIGHT_PANEL_WIDTH: u16 = 55;
|
||||||
/// System vs backup split (equal)
|
|
||||||
pub const SYSTEM_PANEL_HEIGHT: u16 = 50;
|
|
||||||
pub const BACKUP_PANEL_HEIGHT: u16 = 50;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Typography system
|
/// Typography system
|
||||||
@ -241,11 +241,14 @@ impl StatusIcons {
|
|||||||
/// Get status icon symbol
|
/// Get status icon symbol
|
||||||
pub fn get_icon(status: Status) -> &'static str {
|
pub fn get_icon(status: Status) -> &'static str {
|
||||||
match status {
|
match status {
|
||||||
|
Status::Info => "", // No icon for informational data
|
||||||
Status::Ok => "●",
|
Status::Ok => "●",
|
||||||
|
Status::Inactive => "○", // Empty circle for inactive services
|
||||||
Status::Pending => "◉", // Hollow circle for pending
|
Status::Pending => "◉", // Hollow circle for pending
|
||||||
Status::Warning => "◐",
|
Status::Warning => "◐",
|
||||||
Status::Critical => "◯",
|
Status::Critical => "!",
|
||||||
Status::Unknown => "?",
|
Status::Unknown => "?",
|
||||||
|
Status::Offline => "○", // Empty circle for offline
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -253,11 +256,14 @@ impl StatusIcons {
|
|||||||
pub fn create_status_spans(status: Status, text: &str) -> Vec<ratatui::text::Span<'static>> {
|
pub fn create_status_spans(status: Status, text: &str) -> Vec<ratatui::text::Span<'static>> {
|
||||||
let icon = Self::get_icon(status);
|
let icon = Self::get_icon(status);
|
||||||
let status_color = match status {
|
let status_color = match status {
|
||||||
|
Status::Info => Theme::muted_text(), // Gray for info
|
||||||
Status::Ok => Theme::success(), // Green
|
Status::Ok => Theme::success(), // Green
|
||||||
|
Status::Inactive => Theme::muted_text(), // Gray for inactive services
|
||||||
Status::Pending => Theme::highlight(), // Blue
|
Status::Pending => Theme::highlight(), // Blue
|
||||||
Status::Warning => Theme::warning(), // Yellow
|
Status::Warning => Theme::warning(), // Yellow
|
||||||
Status::Critical => Theme::error(), // Red
|
Status::Critical => Theme::error(), // Red
|
||||||
Status::Unknown => Theme::muted_text(), // Gray
|
Status::Unknown => Theme::muted_text(), // Gray
|
||||||
|
Status::Offline => Theme::muted_text(), // Dark gray for offline
|
||||||
};
|
};
|
||||||
|
|
||||||
vec![
|
vec![
|
||||||
@ -289,27 +295,9 @@ impl Components {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Widget block with focus indicator (blue border)
|
|
||||||
pub fn focused_widget_block(title: &str) -> Block<'_> {
|
|
||||||
Block::default()
|
|
||||||
.title(title)
|
|
||||||
.borders(Borders::ALL)
|
|
||||||
.style(Style::default().fg(Theme::highlight()).bg(Theme::background())) // Blue border for focus
|
|
||||||
.title_style(
|
|
||||||
Style::default()
|
|
||||||
.fg(Theme::highlight()) // Blue title for focus
|
|
||||||
.bg(Theme::background()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Typography {
|
impl Typography {
|
||||||
/// Main title style (dashboard header)
|
|
||||||
pub fn title() -> Style {
|
|
||||||
Style::default()
|
|
||||||
.fg(Theme::primary_text())
|
|
||||||
.bg(Theme::background())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Widget title style (panel headers) - bold bright white
|
/// Widget title style (panel headers) - bold bright white
|
||||||
pub fn widget_title() -> Style {
|
pub fn widget_title() -> Style {
|
||||||
|
|||||||
@ -1,436 +0,0 @@
|
|||||||
use cm_dashboard_shared::{Metric, Status};
|
|
||||||
use ratatui::{
|
|
||||||
layout::Rect,
|
|
||||||
widgets::Paragraph,
|
|
||||||
Frame,
|
|
||||||
};
|
|
||||||
use tracing::debug;
|
|
||||||
|
|
||||||
use super::Widget;
|
|
||||||
use crate::ui::theme::{StatusIcons, Typography};
|
|
||||||
|
|
||||||
/// Backup widget displaying backup status, services, and repository information
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct BackupWidget {
|
|
||||||
/// Overall backup status
|
|
||||||
overall_status: Status,
|
|
||||||
/// Last backup duration in seconds
|
|
||||||
duration_seconds: Option<i64>,
|
|
||||||
/// Last backup timestamp
|
|
||||||
last_run_timestamp: Option<i64>,
|
|
||||||
/// Total number of backup services
|
|
||||||
total_services: Option<i64>,
|
|
||||||
/// Total repository size in GB
|
|
||||||
total_repo_size_gb: Option<f32>,
|
|
||||||
/// Total disk space for backups in GB
|
|
||||||
backup_disk_total_gb: Option<f32>,
|
|
||||||
/// Used disk space for backups in GB
|
|
||||||
backup_disk_used_gb: Option<f32>,
|
|
||||||
/// Backup disk product name from SMART data
|
|
||||||
backup_disk_product_name: Option<String>,
|
|
||||||
/// Backup disk serial number from SMART data
|
|
||||||
backup_disk_serial_number: Option<String>,
|
|
||||||
/// Backup disk filesystem label
|
|
||||||
backup_disk_filesystem_label: Option<String>,
|
|
||||||
/// Number of completed services
|
|
||||||
services_completed_count: Option<i64>,
|
|
||||||
/// Number of failed services
|
|
||||||
services_failed_count: Option<i64>,
|
|
||||||
/// Number of disabled services
|
|
||||||
services_disabled_count: Option<i64>,
|
|
||||||
/// All individual service metrics for detailed display
|
|
||||||
service_metrics: Vec<ServiceMetricData>,
|
|
||||||
/// Last update indicator
|
|
||||||
has_data: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
struct ServiceMetricData {
|
|
||||||
name: String,
|
|
||||||
status: Status,
|
|
||||||
exit_code: Option<i64>,
|
|
||||||
archive_count: Option<i64>,
|
|
||||||
repo_size_gb: Option<f32>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BackupWidget {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
overall_status: Status::Unknown,
|
|
||||||
duration_seconds: None,
|
|
||||||
last_run_timestamp: None,
|
|
||||||
total_services: None,
|
|
||||||
total_repo_size_gb: None,
|
|
||||||
backup_disk_total_gb: None,
|
|
||||||
backup_disk_used_gb: None,
|
|
||||||
backup_disk_product_name: None,
|
|
||||||
backup_disk_serial_number: None,
|
|
||||||
backup_disk_filesystem_label: None,
|
|
||||||
services_completed_count: None,
|
|
||||||
services_failed_count: None,
|
|
||||||
services_disabled_count: None,
|
|
||||||
service_metrics: Vec::new(),
|
|
||||||
has_data: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check if the backup widget has any data to display
|
|
||||||
pub fn has_data(&self) -> bool {
|
|
||||||
self.has_data
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// Format size with proper units (xxxkB/MB/GB/TB)
|
|
||||||
fn format_size_with_proper_units(size_gb: f32) -> String {
|
|
||||||
if size_gb >= 1000.0 {
|
|
||||||
// TB range
|
|
||||||
format!("{:.1}TB", size_gb / 1000.0)
|
|
||||||
} else if size_gb >= 1.0 {
|
|
||||||
// GB range
|
|
||||||
format!("{:.1}GB", size_gb)
|
|
||||||
} else if size_gb >= 0.001 {
|
|
||||||
// MB range (size_gb * 1024 = MB)
|
|
||||||
let size_mb = size_gb * 1024.0;
|
|
||||||
format!("{:.1}MB", size_mb)
|
|
||||||
} else if size_gb >= 0.000001 {
|
|
||||||
// kB range (size_gb * 1024 * 1024 = kB)
|
|
||||||
let size_kb = size_gb * 1024.0 * 1024.0;
|
|
||||||
format!("{:.0}kB", size_kb)
|
|
||||||
} else {
|
|
||||||
// B range (size_gb * 1024^3 = bytes)
|
|
||||||
let size_bytes = size_gb * 1024.0 * 1024.0 * 1024.0;
|
|
||||||
format!("{:.0}B", size_bytes)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// Extract service name from metric name (e.g., "backup_service_gitea_status" -> "gitea")
|
|
||||||
fn extract_service_name(metric_name: &str) -> Option<String> {
|
|
||||||
if metric_name.starts_with("backup_service_") {
|
|
||||||
let name_part = &metric_name[15..]; // Remove "backup_service_" prefix
|
|
||||||
|
|
||||||
// Try to extract service name by removing known suffixes
|
|
||||||
if let Some(service_name) = name_part.strip_suffix("_status") {
|
|
||||||
Some(service_name.to_string())
|
|
||||||
} else if let Some(service_name) = name_part.strip_suffix("_exit_code") {
|
|
||||||
Some(service_name.to_string())
|
|
||||||
} else if let Some(service_name) = name_part.strip_suffix("_archive_count") {
|
|
||||||
Some(service_name.to_string())
|
|
||||||
} else if let Some(service_name) = name_part.strip_suffix("_repo_size_gb") {
|
|
||||||
Some(service_name.to_string())
|
|
||||||
} else if let Some(service_name) = name_part.strip_suffix("_repo_path") {
|
|
||||||
Some(service_name.to_string())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Widget for BackupWidget {
|
|
||||||
fn update_from_metrics(&mut self, metrics: &[&Metric]) {
|
|
||||||
debug!("Backup widget updating with {} metrics", metrics.len());
|
|
||||||
for metric in metrics {
|
|
||||||
debug!(
|
|
||||||
"Backup metric: {} = {:?} (status: {:?})",
|
|
||||||
metric.name, metric.value, metric.status
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Also debug the service_data after processing
|
|
||||||
debug!("Processing individual service metrics...");
|
|
||||||
|
|
||||||
// Log how many metrics are backup service metrics
|
|
||||||
let service_metric_count = metrics
|
|
||||||
.iter()
|
|
||||||
.filter(|m| m.name.starts_with("backup_service_"))
|
|
||||||
.count();
|
|
||||||
debug!(
|
|
||||||
"Found {} backup_service_ metrics out of {} total backup metrics",
|
|
||||||
service_metric_count,
|
|
||||||
metrics.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
// Reset service metrics
|
|
||||||
self.service_metrics.clear();
|
|
||||||
let mut service_data: std::collections::HashMap<String, ServiceMetricData> =
|
|
||||||
std::collections::HashMap::new();
|
|
||||||
|
|
||||||
for metric in metrics {
|
|
||||||
match metric.name.as_str() {
|
|
||||||
"backup_overall_status" => {
|
|
||||||
let status_str = metric.value.as_string();
|
|
||||||
self.overall_status = match status_str.as_str() {
|
|
||||||
"ok" => Status::Ok,
|
|
||||||
"warning" => Status::Warning,
|
|
||||||
"critical" => Status::Critical,
|
|
||||||
_ => Status::Unknown,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
"backup_duration_seconds" => {
|
|
||||||
self.duration_seconds = metric.value.as_i64();
|
|
||||||
}
|
|
||||||
"backup_last_run_timestamp" => {
|
|
||||||
self.last_run_timestamp = metric.value.as_i64();
|
|
||||||
}
|
|
||||||
"backup_total_services" => {
|
|
||||||
self.total_services = metric.value.as_i64();
|
|
||||||
}
|
|
||||||
"backup_total_repo_size_gb" => {
|
|
||||||
self.total_repo_size_gb = metric.value.as_f32();
|
|
||||||
}
|
|
||||||
"backup_disk_total_gb" => {
|
|
||||||
self.backup_disk_total_gb = metric.value.as_f32();
|
|
||||||
}
|
|
||||||
"backup_disk_used_gb" => {
|
|
||||||
self.backup_disk_used_gb = metric.value.as_f32();
|
|
||||||
}
|
|
||||||
"backup_disk_product_name" => {
|
|
||||||
self.backup_disk_product_name = Some(metric.value.as_string());
|
|
||||||
}
|
|
||||||
"backup_disk_serial_number" => {
|
|
||||||
self.backup_disk_serial_number = Some(metric.value.as_string());
|
|
||||||
}
|
|
||||||
"backup_disk_filesystem_label" => {
|
|
||||||
self.backup_disk_filesystem_label = Some(metric.value.as_string());
|
|
||||||
}
|
|
||||||
"backup_services_completed_count" => {
|
|
||||||
self.services_completed_count = metric.value.as_i64();
|
|
||||||
}
|
|
||||||
"backup_services_failed_count" => {
|
|
||||||
self.services_failed_count = metric.value.as_i64();
|
|
||||||
}
|
|
||||||
"backup_services_disabled_count" => {
|
|
||||||
self.services_disabled_count = metric.value.as_i64();
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
// Handle individual service metrics
|
|
||||||
if let Some(service_name) = Self::extract_service_name(&metric.name) {
|
|
||||||
debug!(
|
|
||||||
"Extracted service name '{}' from metric '{}'",
|
|
||||||
service_name, metric.name
|
|
||||||
);
|
|
||||||
let entry = service_data.entry(service_name.clone()).or_insert_with(|| {
|
|
||||||
ServiceMetricData {
|
|
||||||
name: service_name,
|
|
||||||
status: Status::Unknown,
|
|
||||||
exit_code: None,
|
|
||||||
archive_count: None,
|
|
||||||
repo_size_gb: None,
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
if metric.name.ends_with("_status") {
|
|
||||||
entry.status = metric.status;
|
|
||||||
debug!("Set status for {}: {:?}", entry.name, entry.status);
|
|
||||||
} else if metric.name.ends_with("_exit_code") {
|
|
||||||
entry.exit_code = metric.value.as_i64();
|
|
||||||
} else if metric.name.ends_with("_archive_count") {
|
|
||||||
entry.archive_count = metric.value.as_i64();
|
|
||||||
debug!(
|
|
||||||
"Set archive_count for {}: {:?}",
|
|
||||||
entry.name, entry.archive_count
|
|
||||||
);
|
|
||||||
} else if metric.name.ends_with("_repo_size_gb") {
|
|
||||||
entry.repo_size_gb = metric.value.as_f32();
|
|
||||||
debug!(
|
|
||||||
"Set repo_size_gb for {}: {:?}",
|
|
||||||
entry.name, entry.repo_size_gb
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
debug!(
|
|
||||||
"Could not extract service name from metric: {}",
|
|
||||||
metric.name
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert service data to sorted vector
|
|
||||||
let mut services: Vec<ServiceMetricData> = service_data.into_values().collect();
|
|
||||||
services.sort_by(|a, b| a.name.cmp(&b.name));
|
|
||||||
self.service_metrics = services;
|
|
||||||
|
|
||||||
self.has_data = !metrics.is_empty();
|
|
||||||
|
|
||||||
debug!(
|
|
||||||
"Backup widget updated: status={:?}, services={}, total_size={:?}GB",
|
|
||||||
self.overall_status,
|
|
||||||
self.service_metrics.len(),
|
|
||||||
self.total_repo_size_gb
|
|
||||||
);
|
|
||||||
|
|
||||||
// Debug individual service data
|
|
||||||
for service in &self.service_metrics {
|
|
||||||
debug!(
|
|
||||||
"Service {}: status={:?}, archives={:?}, size={:?}GB",
|
|
||||||
service.name, service.status, service.archive_count, service.repo_size_gb
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BackupWidget {
|
|
||||||
/// Render with scroll offset support
|
|
||||||
pub fn render_with_scroll(&mut self, frame: &mut Frame, area: Rect, scroll_offset: usize) {
|
|
||||||
let mut lines = Vec::new();
|
|
||||||
|
|
||||||
// Latest backup section
|
|
||||||
lines.push(ratatui::text::Line::from(vec![
|
|
||||||
ratatui::text::Span::styled("Latest backup:", Typography::widget_title())
|
|
||||||
]));
|
|
||||||
|
|
||||||
// Timestamp with status icon
|
|
||||||
let timestamp_text = if let Some(timestamp) = self.last_run_timestamp {
|
|
||||||
self.format_timestamp(timestamp)
|
|
||||||
} else {
|
|
||||||
"Unknown".to_string()
|
|
||||||
};
|
|
||||||
let timestamp_spans = StatusIcons::create_status_spans(
|
|
||||||
self.overall_status,
|
|
||||||
×tamp_text
|
|
||||||
);
|
|
||||||
lines.push(ratatui::text::Line::from(timestamp_spans));
|
|
||||||
|
|
||||||
// Duration as sub-item
|
|
||||||
if let Some(duration) = self.duration_seconds {
|
|
||||||
let duration_text = self.format_duration(duration);
|
|
||||||
lines.push(ratatui::text::Line::from(vec![
|
|
||||||
ratatui::text::Span::styled(" └─ ", Typography::tree()),
|
|
||||||
ratatui::text::Span::styled(format!("Duration: {}", duration_text), Typography::secondary())
|
|
||||||
]));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Disk section
|
|
||||||
lines.push(ratatui::text::Line::from(vec![
|
|
||||||
ratatui::text::Span::styled("Disk:", Typography::widget_title())
|
|
||||||
]));
|
|
||||||
|
|
||||||
// Disk product name with status
|
|
||||||
if let Some(product) = &self.backup_disk_product_name {
|
|
||||||
let disk_spans = StatusIcons::create_status_spans(
|
|
||||||
Status::Ok, // Assuming disk is OK if we have data
|
|
||||||
product
|
|
||||||
);
|
|
||||||
lines.push(ratatui::text::Line::from(disk_spans));
|
|
||||||
|
|
||||||
// Serial number as sub-item
|
|
||||||
if let Some(serial) = &self.backup_disk_serial_number {
|
|
||||||
lines.push(ratatui::text::Line::from(vec![
|
|
||||||
ratatui::text::Span::styled(" ├─ ", Typography::tree()),
|
|
||||||
ratatui::text::Span::styled(format!("S/N: {}", serial), Typography::secondary())
|
|
||||||
]));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Usage as sub-item
|
|
||||||
if let (Some(used), Some(total)) = (self.backup_disk_used_gb, self.backup_disk_total_gb) {
|
|
||||||
let used_str = Self::format_size_with_proper_units(used);
|
|
||||||
let total_str = Self::format_size_with_proper_units(total);
|
|
||||||
lines.push(ratatui::text::Line::from(vec![
|
|
||||||
ratatui::text::Span::styled(" └─ ", Typography::tree()),
|
|
||||||
ratatui::text::Span::styled(format!("Usage: {}/{}", used_str, total_str), Typography::secondary())
|
|
||||||
]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Repos section
|
|
||||||
lines.push(ratatui::text::Line::from(vec![
|
|
||||||
ratatui::text::Span::styled("Repos:", Typography::widget_title())
|
|
||||||
]));
|
|
||||||
|
|
||||||
// Add all repository lines (no truncation here - scroll will handle display)
|
|
||||||
for service in &self.service_metrics {
|
|
||||||
if let (Some(archives), Some(size_gb)) = (service.archive_count, service.repo_size_gb) {
|
|
||||||
let size_str = Self::format_size_with_proper_units(size_gb);
|
|
||||||
let repo_text = format!("{} ({}) {}", service.name, archives, size_str);
|
|
||||||
let repo_spans = StatusIcons::create_status_spans(service.status, &repo_text);
|
|
||||||
lines.push(ratatui::text::Line::from(repo_spans));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply scroll offset
|
|
||||||
let total_lines = lines.len();
|
|
||||||
let available_height = area.height as usize;
|
|
||||||
|
|
||||||
// Calculate scroll boundaries
|
|
||||||
let max_scroll = if total_lines > available_height {
|
|
||||||
total_lines - available_height
|
|
||||||
} else {
|
|
||||||
total_lines.saturating_sub(1)
|
|
||||||
};
|
|
||||||
let effective_scroll = scroll_offset.min(max_scroll);
|
|
||||||
|
|
||||||
// Apply scrolling if needed
|
|
||||||
if scroll_offset > 0 || total_lines > available_height {
|
|
||||||
let mut visible_lines: Vec<_> = lines
|
|
||||||
.into_iter()
|
|
||||||
.skip(effective_scroll)
|
|
||||||
.take(available_height)
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// Add scroll indicator if there are hidden lines
|
|
||||||
if total_lines > available_height {
|
|
||||||
let hidden_above = effective_scroll;
|
|
||||||
let hidden_below = total_lines.saturating_sub(effective_scroll + available_height);
|
|
||||||
|
|
||||||
if (hidden_above > 0 || hidden_below > 0) && !visible_lines.is_empty() {
|
|
||||||
let scroll_text = if hidden_above > 0 && hidden_below > 0 {
|
|
||||||
format!("... {} above, {} below", hidden_above, hidden_below)
|
|
||||||
} else if hidden_above > 0 {
|
|
||||||
format!("... {} more above", hidden_above)
|
|
||||||
} else {
|
|
||||||
format!("... {} more below", hidden_below)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Replace last line with scroll indicator
|
|
||||||
visible_lines.pop();
|
|
||||||
visible_lines.push(ratatui::text::Line::from(vec![
|
|
||||||
ratatui::text::Span::styled(scroll_text, Typography::muted())
|
|
||||||
]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let paragraph = Paragraph::new(ratatui::text::Text::from(visible_lines));
|
|
||||||
frame.render_widget(paragraph, area);
|
|
||||||
} else {
|
|
||||||
let paragraph = Paragraph::new(ratatui::text::Text::from(lines));
|
|
||||||
frame.render_widget(paragraph, area);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BackupWidget {
|
|
||||||
/// Format timestamp for display
|
|
||||||
fn format_timestamp(&self, timestamp: i64) -> String {
|
|
||||||
let datetime = chrono::DateTime::from_timestamp(timestamp, 0)
|
|
||||||
.unwrap_or_else(|| chrono::Utc::now());
|
|
||||||
datetime.format("%Y-%m-%d %H:%M:%S").to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Format duration in seconds to human readable format
|
|
||||||
fn format_duration(&self, duration_seconds: i64) -> String {
|
|
||||||
let minutes = duration_seconds / 60;
|
|
||||||
let seconds = duration_seconds % 60;
|
|
||||||
|
|
||||||
if minutes > 0 {
|
|
||||||
format!("{}.{}m", minutes, seconds / 6) // Show 1 decimal for minutes
|
|
||||||
} else {
|
|
||||||
format!("{}s", seconds)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for BackupWidget {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1 +0,0 @@
|
|||||||
// This file is intentionally left minimal - CPU functionality is handled by the SystemWidget
|
|
||||||
@ -1 +0,0 @@
|
|||||||
// This file is intentionally left minimal - Memory functionality is handled by the SystemWidget
|
|
||||||
@ -1,18 +1,13 @@
|
|||||||
use cm_dashboard_shared::Metric;
|
use cm_dashboard_shared::AgentData;
|
||||||
|
|
||||||
pub mod backup;
|
|
||||||
pub mod cpu;
|
|
||||||
pub mod memory;
|
|
||||||
pub mod services;
|
pub mod services;
|
||||||
pub mod system;
|
pub mod system;
|
||||||
|
|
||||||
pub use backup::BackupWidget;
|
|
||||||
pub use services::ServicesWidget;
|
pub use services::ServicesWidget;
|
||||||
pub use system::SystemWidget;
|
pub use system::SystemWidget;
|
||||||
|
|
||||||
/// Widget trait for UI components that display metrics
|
/// Widget trait for UI components that display structured data
|
||||||
pub trait Widget {
|
pub trait Widget {
|
||||||
/// Update widget with new metrics data
|
/// Update widget with structured agent data
|
||||||
fn update_from_metrics(&mut self, metrics: &[&Metric]);
|
fn update_from_agent_data(&mut self, agent_data: &AgentData);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
use cm_dashboard_shared::{Metric, Status};
|
use cm_dashboard_shared::{Metric, Status};
|
||||||
|
use super::Widget;
|
||||||
use ratatui::{
|
use ratatui::{
|
||||||
layout::{Constraint, Direction, Layout, Rect},
|
layout::{Constraint, Direction, Layout, Rect},
|
||||||
widgets::Paragraph,
|
widgets::Paragraph,
|
||||||
@ -7,11 +8,77 @@ use ratatui::{
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use super::Widget;
|
|
||||||
use crate::ui::theme::{Components, StatusIcons, Theme, Typography};
|
use crate::ui::theme::{Components, StatusIcons, Theme, Typography};
|
||||||
use crate::ui::{CommandStatus, CommandType};
|
|
||||||
use ratatui::style::Style;
|
use ratatui::style::Style;
|
||||||
|
|
||||||
|
/// Column visibility configuration based on terminal width
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
struct ColumnVisibility {
|
||||||
|
show_name: bool,
|
||||||
|
show_status: bool,
|
||||||
|
show_ram: bool,
|
||||||
|
show_uptime: bool,
|
||||||
|
show_restarts: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ColumnVisibility {
|
||||||
|
/// Calculate actual width needed for all columns
|
||||||
|
const NAME_WIDTH: u16 = 23;
|
||||||
|
const STATUS_WIDTH: u16 = 10;
|
||||||
|
const RAM_WIDTH: u16 = 8;
|
||||||
|
const UPTIME_WIDTH: u16 = 8;
|
||||||
|
const RESTARTS_WIDTH: u16 = 5;
|
||||||
|
const COLUMN_SPACING: u16 = 1; // Space between columns
|
||||||
|
|
||||||
|
/// Determine which columns to show based on available width
|
||||||
|
/// Priority order: Name > Status > RAM > Uptime > Restarts
|
||||||
|
fn from_width(width: u16) -> Self {
|
||||||
|
// Calculate cumulative widths for each configuration
|
||||||
|
let minimal = Self::NAME_WIDTH + Self::COLUMN_SPACING + Self::STATUS_WIDTH; // 34
|
||||||
|
let with_ram = minimal + Self::COLUMN_SPACING + Self::RAM_WIDTH; // 43
|
||||||
|
let with_uptime = with_ram + Self::COLUMN_SPACING + Self::UPTIME_WIDTH; // 52
|
||||||
|
let full = with_uptime + Self::COLUMN_SPACING + Self::RESTARTS_WIDTH; // 58
|
||||||
|
|
||||||
|
if width >= full {
|
||||||
|
// Show all columns
|
||||||
|
Self {
|
||||||
|
show_name: true,
|
||||||
|
show_status: true,
|
||||||
|
show_ram: true,
|
||||||
|
show_uptime: true,
|
||||||
|
show_restarts: true,
|
||||||
|
}
|
||||||
|
} else if width >= with_uptime {
|
||||||
|
// Hide restarts
|
||||||
|
Self {
|
||||||
|
show_name: true,
|
||||||
|
show_status: true,
|
||||||
|
show_ram: true,
|
||||||
|
show_uptime: true,
|
||||||
|
show_restarts: false,
|
||||||
|
}
|
||||||
|
} else if width >= with_ram {
|
||||||
|
// Hide uptime and restarts
|
||||||
|
Self {
|
||||||
|
show_name: true,
|
||||||
|
show_status: true,
|
||||||
|
show_ram: true,
|
||||||
|
show_uptime: false,
|
||||||
|
show_restarts: false,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Minimal: Name + Status only
|
||||||
|
Self {
|
||||||
|
show_name: true,
|
||||||
|
show_status: true,
|
||||||
|
show_ram: false,
|
||||||
|
show_uptime: false,
|
||||||
|
show_restarts: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Services widget displaying hierarchical systemd service statuses
|
/// Services widget displaying hierarchical systemd service statuses
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct ServicesWidget {
|
pub struct ServicesWidget {
|
||||||
@ -24,16 +91,20 @@ pub struct ServicesWidget {
|
|||||||
/// Last update indicator
|
/// Last update indicator
|
||||||
has_data: bool,
|
has_data: bool,
|
||||||
/// Currently selected service index (for navigation cursor)
|
/// Currently selected service index (for navigation cursor)
|
||||||
selected_index: usize,
|
pub selected_index: usize,
|
||||||
|
/// Scroll offset for viewport (which display line is at the top)
|
||||||
|
pub scroll_offset: usize,
|
||||||
|
/// Last rendered viewport height (for accurate scroll bounds)
|
||||||
|
last_viewport_height: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct ServiceInfo {
|
struct ServiceInfo {
|
||||||
status: String,
|
metrics: Vec<(String, f32, Option<String>)>, // (label, value, unit)
|
||||||
memory_mb: Option<f32>,
|
|
||||||
disk_gb: Option<f32>,
|
|
||||||
latency_ms: Option<f32>,
|
|
||||||
widget_status: Status,
|
widget_status: Status,
|
||||||
|
memory_bytes: Option<u64>,
|
||||||
|
restart_count: Option<u32>,
|
||||||
|
uptime_seconds: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ServicesWidget {
|
impl ServicesWidget {
|
||||||
@ -44,16 +115,17 @@ impl ServicesWidget {
|
|||||||
status: Status::Unknown,
|
status: Status::Unknown,
|
||||||
has_data: false,
|
has_data: false,
|
||||||
selected_index: 0,
|
selected_index: 0,
|
||||||
|
scroll_offset: 0,
|
||||||
|
last_viewport_height: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract service name and determine if it's a parent or sub-service
|
/// Extract service name and determine if it's a parent or sub-service
|
||||||
|
#[allow(dead_code)]
|
||||||
fn extract_service_info(metric_name: &str) -> Option<(String, Option<String>)> {
|
fn extract_service_info(metric_name: &str) -> Option<(String, Option<String>)> {
|
||||||
if metric_name.starts_with("service_") {
|
if metric_name.starts_with("service_") {
|
||||||
if let Some(end_pos) = metric_name
|
if let Some(end_pos) = metric_name
|
||||||
.rfind("_status")
|
.rfind("_status")
|
||||||
.or_else(|| metric_name.rfind("_memory_mb"))
|
|
||||||
.or_else(|| metric_name.rfind("_disk_gb"))
|
|
||||||
.or_else(|| metric_name.rfind("_latency_ms"))
|
.or_else(|| metric_name.rfind("_latency_ms"))
|
||||||
{
|
{
|
||||||
let service_part = &metric_name[8..end_pos]; // Remove "service_" prefix
|
let service_part = &metric_name[8..end_pos]; // Remove "service_" prefix
|
||||||
@ -76,196 +148,289 @@ impl ServicesWidget {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Format disk size with appropriate units (kB/MB/GB)
|
|
||||||
fn format_disk_size(size_gb: f32) -> String {
|
|
||||||
let size_mb = size_gb * 1024.0; // Convert GB to MB
|
|
||||||
|
|
||||||
if size_mb >= 1024.0 {
|
|
||||||
// Show as GB
|
|
||||||
format!("{:.1}GB", size_gb)
|
|
||||||
} else if size_mb >= 1.0 {
|
|
||||||
// Show as MB
|
|
||||||
format!("{:.0}MB", size_mb)
|
|
||||||
} else if size_mb >= 0.001 {
|
|
||||||
// Convert to kB
|
|
||||||
let size_kb = size_mb * 1024.0;
|
|
||||||
format!("{:.0}kB", size_kb)
|
|
||||||
} else {
|
|
||||||
// Show very small sizes as bytes
|
|
||||||
let size_bytes = size_mb * 1024.0 * 1024.0;
|
|
||||||
format!("{:.0}B", size_bytes)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Format parent service line - returns text without icon for span formatting
|
/// Format parent service line - returns text without icon for span formatting
|
||||||
fn format_parent_service_line(&self, name: &str, info: &ServiceInfo) -> String {
|
fn format_parent_service_line(&self, name: &str, info: &ServiceInfo, columns: ColumnVisibility) -> String {
|
||||||
let memory_str = info
|
// Truncate long service names to fit layout
|
||||||
.memory_mb
|
// NAME_WIDTH - 3 chars for "..." = max displayable chars
|
||||||
.map_or("0M".to_string(), |m| format!("{:.0}M", m));
|
let max_name_len = (ColumnVisibility::NAME_WIDTH - 3) as usize;
|
||||||
let disk_str = info
|
let short_name = if name.len() > max_name_len {
|
||||||
.disk_gb
|
format!("{}...", &name[..max_name_len.saturating_sub(3)])
|
||||||
.map_or("0".to_string(), |d| Self::format_disk_size(d));
|
|
||||||
|
|
||||||
// Truncate long service names to fit layout (account for icon space)
|
|
||||||
let short_name = if name.len() > 22 {
|
|
||||||
format!("{}...", &name[..19])
|
|
||||||
} else {
|
} else {
|
||||||
name.to_string()
|
name.to_string()
|
||||||
};
|
};
|
||||||
|
|
||||||
// Parent services always show active/inactive status
|
// Convert Status enum to display text
|
||||||
let status_str = match info.widget_status {
|
let status_str = match info.widget_status {
|
||||||
Status::Ok => "active".to_string(),
|
Status::Info => "", // Shouldn't happen for parent services
|
||||||
Status::Pending => "pending".to_string(),
|
Status::Ok => "active",
|
||||||
Status::Warning => "inactive".to_string(),
|
Status::Inactive => "inactive",
|
||||||
Status::Critical => "failed".to_string(),
|
Status::Critical => "failed",
|
||||||
Status::Unknown => "unknown".to_string(),
|
Status::Pending => "pending",
|
||||||
|
Status::Warning => "warning",
|
||||||
|
Status::Unknown => "unknown",
|
||||||
|
Status::Offline => "offline",
|
||||||
};
|
};
|
||||||
|
|
||||||
format!(
|
// Format memory
|
||||||
"{:<23} {:<10} {:<8} {:<8}",
|
let memory_str = info.memory_bytes.map_or("-".to_string(), |bytes| {
|
||||||
short_name, status_str, memory_str, disk_str
|
let mb = bytes as f64 / (1024.0 * 1024.0);
|
||||||
)
|
if mb >= 1000.0 {
|
||||||
}
|
format!("{:.1}G", mb / 1024.0)
|
||||||
|
} else {
|
||||||
/// Get status icon for service, considering command status for visual feedback
|
format!("{:.0}M", mb)
|
||||||
fn get_service_icon_and_status(&self, service_name: &str, info: &ServiceInfo, command_status: Option<&CommandStatus>) -> (String, String, ratatui::prelude::Color) {
|
|
||||||
// Check if this service is currently being operated on
|
|
||||||
if let Some(status) = command_status {
|
|
||||||
match status {
|
|
||||||
CommandStatus::InProgress { command_type, target, .. } => {
|
|
||||||
if target == service_name {
|
|
||||||
// Only show special icons for service commands
|
|
||||||
if let Some((icon, status_text)) = match command_type {
|
|
||||||
CommandType::ServiceRestart => Some(("↻", "restarting")),
|
|
||||||
CommandType::ServiceStart => Some(("↑", "starting")),
|
|
||||||
CommandType::ServiceStop => Some(("↓", "stopping")),
|
|
||||||
_ => None, // Don't handle non-service commands here
|
|
||||||
} {
|
|
||||||
return (icon.to_string(), status_text.to_string(), Theme::highlight());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {} // Success/Failed states will show normal status
|
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Format uptime
|
||||||
|
let uptime_str = info.uptime_seconds.map_or("-".to_string(), |secs| {
|
||||||
|
let days = secs / 86400;
|
||||||
|
let hours = (secs % 86400) / 3600;
|
||||||
|
let mins = (secs % 3600) / 60;
|
||||||
|
|
||||||
|
if days > 0 {
|
||||||
|
format!("{}d{}h", days, hours)
|
||||||
|
} else if hours > 0 {
|
||||||
|
format!("{}h{}m", hours, mins)
|
||||||
|
} else {
|
||||||
|
format!("{}m", mins)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Format restarts (show "!" if > 0 to indicate instability)
|
||||||
|
let restart_str = info.restart_count.map_or("-".to_string(), |count| {
|
||||||
|
if count > 0 {
|
||||||
|
format!("!{}", count)
|
||||||
|
} else {
|
||||||
|
"0".to_string()
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Build format string based on column visibility
|
||||||
|
let mut parts = Vec::new();
|
||||||
|
if columns.show_name {
|
||||||
|
parts.push(format!("{:<width$}", short_name, width = ColumnVisibility::NAME_WIDTH as usize));
|
||||||
}
|
}
|
||||||
|
if columns.show_status {
|
||||||
// Normal status display
|
parts.push(format!("{:<width$}", status_str, width = ColumnVisibility::STATUS_WIDTH as usize));
|
||||||
let icon = StatusIcons::get_icon(info.widget_status);
|
}
|
||||||
let status_color = match info.widget_status {
|
if columns.show_ram {
|
||||||
Status::Ok => Theme::success(),
|
parts.push(format!("{:<width$}", memory_str, width = ColumnVisibility::RAM_WIDTH as usize));
|
||||||
Status::Pending => Theme::highlight(),
|
}
|
||||||
Status::Warning => Theme::warning(),
|
if columns.show_uptime {
|
||||||
Status::Critical => Theme::error(),
|
parts.push(format!("{:<width$}", uptime_str, width = ColumnVisibility::UPTIME_WIDTH as usize));
|
||||||
Status::Unknown => Theme::muted_text(),
|
}
|
||||||
};
|
if columns.show_restarts {
|
||||||
|
parts.push(format!("{:<width$}", restart_str, width = ColumnVisibility::RESTARTS_WIDTH as usize));
|
||||||
(icon.to_string(), info.status.clone(), status_color)
|
}
|
||||||
|
|
||||||
|
parts.join(" ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Create spans for sub-service with icon next to name, considering command status
|
|
||||||
fn create_sub_service_spans_with_status(
|
/// Create spans for sub-service with icon next to name
|
||||||
|
fn create_sub_service_spans(
|
||||||
&self,
|
&self,
|
||||||
name: &str,
|
name: &str,
|
||||||
info: &ServiceInfo,
|
info: &ServiceInfo,
|
||||||
is_last: bool,
|
is_last: bool,
|
||||||
command_status: Option<&CommandStatus>,
|
|
||||||
) -> Vec<ratatui::text::Span<'static>> {
|
) -> Vec<ratatui::text::Span<'static>> {
|
||||||
|
// Informational sub-services (Status::Info) can use more width since they don't show columns
|
||||||
|
let max_width = if info.widget_status == Status::Info { 50 } else { 18 };
|
||||||
|
|
||||||
// Truncate long sub-service names to fit layout (accounting for indentation)
|
// Truncate long sub-service names to fit layout (accounting for indentation)
|
||||||
let short_name = if name.len() > 18 {
|
let short_name = if name.len() > max_width {
|
||||||
format!("{}...", &name[..15])
|
format!("{}...", &name[..(max_width.saturating_sub(3))])
|
||||||
} else {
|
} else {
|
||||||
name.to_string()
|
name.to_string()
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get status icon and text, considering command status
|
// Get status icon and text
|
||||||
let (icon, mut status_str, status_color) = self.get_service_icon_and_status(name, info, command_status);
|
let icon = StatusIcons::get_icon(info.widget_status);
|
||||||
|
let status_color = match info.widget_status {
|
||||||
|
Status::Info => Theme::muted_text(),
|
||||||
|
Status::Ok => Theme::success(),
|
||||||
|
Status::Inactive => Theme::muted_text(),
|
||||||
|
Status::Pending => Theme::highlight(),
|
||||||
|
Status::Warning => Theme::warning(),
|
||||||
|
Status::Critical => Theme::error(),
|
||||||
|
Status::Unknown => Theme::muted_text(),
|
||||||
|
Status::Offline => Theme::muted_text(),
|
||||||
|
};
|
||||||
|
|
||||||
// For sub-services, prefer latency if available (unless command is in progress)
|
// Display metrics or status for sub-services
|
||||||
if command_status.is_none() {
|
let status_str = if !info.metrics.is_empty() {
|
||||||
if let Some(latency) = info.latency_ms {
|
// Show first metric with label and unit
|
||||||
status_str = if latency < 0.0 {
|
let (label, value, unit) = &info.metrics[0];
|
||||||
"timeout".to_string()
|
match unit {
|
||||||
} else {
|
Some(u) => format!("{}: {:.1} {}", label, value, u),
|
||||||
format!("{:.0}ms", latency)
|
None => format!("{}: {:.1}", label, value),
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
|
// Convert Status enum to display text for sub-services
|
||||||
|
match info.widget_status {
|
||||||
|
Status::Info => "",
|
||||||
|
Status::Ok => "active",
|
||||||
|
Status::Inactive => "inactive",
|
||||||
|
Status::Critical => "failed",
|
||||||
|
Status::Pending => "pending",
|
||||||
|
Status::Warning => "warning",
|
||||||
|
Status::Unknown => "unknown",
|
||||||
|
Status::Offline => "offline",
|
||||||
|
}.to_string()
|
||||||
|
};
|
||||||
let tree_symbol = if is_last { "└─" } else { "├─" };
|
let tree_symbol = if is_last { "└─" } else { "├─" };
|
||||||
|
|
||||||
vec![
|
if info.widget_status == Status::Info {
|
||||||
// Indentation and tree prefix
|
// Informational data - no status icon, show metrics if available
|
||||||
ratatui::text::Span::styled(
|
let mut spans = vec![
|
||||||
format!(" {} ", tree_symbol),
|
// Indentation and tree prefix
|
||||||
Typography::tree(),
|
ratatui::text::Span::styled(
|
||||||
),
|
format!(" {} ", tree_symbol),
|
||||||
// Status icon
|
Typography::tree(),
|
||||||
ratatui::text::Span::styled(
|
),
|
||||||
format!("{} ", icon),
|
// Service name (no icon) - no fixed width padding for Info status
|
||||||
Style::default().fg(status_color).bg(Theme::background()),
|
ratatui::text::Span::styled(
|
||||||
),
|
short_name,
|
||||||
// Service name
|
Style::default()
|
||||||
ratatui::text::Span::styled(
|
.fg(Theme::secondary_text())
|
||||||
format!("{:<18} ", short_name),
|
.bg(Theme::background()),
|
||||||
Style::default()
|
),
|
||||||
.fg(Theme::secondary_text())
|
];
|
||||||
.bg(Theme::background()),
|
|
||||||
),
|
// Add metrics if available (e.g., Docker image size)
|
||||||
// Status/latency text
|
if !status_str.is_empty() {
|
||||||
ratatui::text::Span::styled(
|
spans.push(ratatui::text::Span::styled(
|
||||||
status_str,
|
status_str,
|
||||||
Style::default()
|
Style::default()
|
||||||
.fg(Theme::secondary_text())
|
.fg(Theme::secondary_text())
|
||||||
.bg(Theme::background()),
|
.bg(Theme::background()),
|
||||||
),
|
));
|
||||||
]
|
}
|
||||||
|
|
||||||
|
spans
|
||||||
|
} else {
|
||||||
|
vec![
|
||||||
|
// Indentation and tree prefix
|
||||||
|
ratatui::text::Span::styled(
|
||||||
|
format!(" {} ", tree_symbol),
|
||||||
|
Typography::tree(),
|
||||||
|
),
|
||||||
|
// Status icon
|
||||||
|
ratatui::text::Span::styled(
|
||||||
|
format!("{} ", icon),
|
||||||
|
Style::default().fg(status_color).bg(Theme::background()),
|
||||||
|
),
|
||||||
|
// Service name
|
||||||
|
ratatui::text::Span::styled(
|
||||||
|
format!("{:<18} ", short_name),
|
||||||
|
Style::default()
|
||||||
|
.fg(Theme::secondary_text())
|
||||||
|
.bg(Theme::background()),
|
||||||
|
),
|
||||||
|
// Status/latency text
|
||||||
|
ratatui::text::Span::styled(
|
||||||
|
status_str,
|
||||||
|
Style::default()
|
||||||
|
.fg(Theme::secondary_text())
|
||||||
|
.bg(Theme::background()),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Move selection up
|
/// Move selection up
|
||||||
pub fn select_previous(&mut self) {
|
pub fn select_previous(&mut self) {
|
||||||
if self.selected_index > 0 {
|
if self.selected_index > 0 {
|
||||||
self.selected_index -= 1;
|
self.selected_index -= 1;
|
||||||
|
self.ensure_selected_visible();
|
||||||
}
|
}
|
||||||
debug!("Service selection moved up to: {}", self.selected_index);
|
debug!("Service selection moved up to: {}", self.selected_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Move selection down
|
/// Move selection down
|
||||||
pub fn select_next(&mut self, total_services: usize) {
|
pub fn select_next(&mut self, total_services: usize) {
|
||||||
if total_services > 0 && self.selected_index < total_services.saturating_sub(1) {
|
if total_services > 0 && self.selected_index < total_services.saturating_sub(1) {
|
||||||
self.selected_index += 1;
|
self.selected_index += 1;
|
||||||
|
self.ensure_selected_visible();
|
||||||
}
|
}
|
||||||
debug!("Service selection: {}/{}", self.selected_index, total_services);
|
debug!("Service selection: {}/{}", self.selected_index, total_services);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get currently selected service name (for actions)
|
/// Convert parent service index to display line index
|
||||||
pub fn get_selected_service(&self) -> Option<String> {
|
fn parent_index_to_display_line(&self, parent_index: usize) -> usize {
|
||||||
// Build the same display list to find the selected service
|
|
||||||
let mut display_lines: Vec<(String, Status, bool, Option<(ServiceInfo, bool)>)> = Vec::new();
|
|
||||||
|
|
||||||
let mut parent_services: Vec<_> = self.parent_services.iter().collect();
|
let mut parent_services: Vec<_> = self.parent_services.iter().collect();
|
||||||
parent_services.sort_by(|(a, _), (b, _)| a.cmp(b));
|
parent_services.sort_by(|(a, _), (b, _)| a.cmp(b));
|
||||||
|
|
||||||
for (parent_name, parent_info) in parent_services {
|
let mut display_line = 0;
|
||||||
display_lines.push((parent_name.clone(), parent_info.widget_status, false, None));
|
for (idx, (parent_name, _)) in parent_services.iter().enumerate() {
|
||||||
|
if idx == parent_index {
|
||||||
if let Some(sub_list) = self.sub_services.get(parent_name) {
|
return display_line;
|
||||||
let mut sorted_subs = sub_list.clone();
|
}
|
||||||
sorted_subs.sort_by(|(a, _), (b, _)| a.cmp(b));
|
display_line += 1; // Parent service line
|
||||||
|
|
||||||
for (i, (sub_name, sub_info)) in sorted_subs.iter().enumerate() {
|
// Add sub-service lines
|
||||||
let is_last_sub = i == sorted_subs.len() - 1;
|
if let Some(sub_list) = self.sub_services.get(*parent_name) {
|
||||||
display_lines.push((
|
display_line += sub_list.len();
|
||||||
format!("{}_{}", parent_name, sub_name), // Use parent_sub format for sub-services
|
|
||||||
sub_info.widget_status,
|
|
||||||
true,
|
|
||||||
Some((sub_info.clone(), is_last_sub)),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
display_line
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ensure the currently selected service is visible in the viewport
|
||||||
|
fn ensure_selected_visible(&mut self) {
|
||||||
|
if self.last_viewport_height == 0 {
|
||||||
|
return; // Can't adjust without knowing viewport size
|
||||||
|
}
|
||||||
|
|
||||||
|
let display_line = self.parent_index_to_display_line(self.selected_index);
|
||||||
|
let total_display_lines = self.get_total_display_lines();
|
||||||
|
let viewport_height = self.last_viewport_height;
|
||||||
|
|
||||||
|
// Check if selected line is above visible area
|
||||||
|
if display_line < self.scroll_offset {
|
||||||
|
self.scroll_offset = display_line;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate current effective viewport (accounting for "more below" if present)
|
||||||
|
let current_remaining = total_display_lines.saturating_sub(self.scroll_offset);
|
||||||
|
let current_has_more = current_remaining > viewport_height;
|
||||||
|
let current_effective = if current_has_more {
|
||||||
|
viewport_height.saturating_sub(1)
|
||||||
|
} else {
|
||||||
|
viewport_height
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check if selected line is below current visible area
|
||||||
|
if display_line >= self.scroll_offset + current_effective {
|
||||||
|
// Need to scroll down. Position selected line so there's room for "more below" if needed
|
||||||
|
// Strategy: if there are lines below the selected line, don't put it at the very bottom
|
||||||
|
let has_content_below = display_line < total_display_lines - 1;
|
||||||
|
|
||||||
|
if has_content_below {
|
||||||
|
// Leave room for "... X more below" message by positioning selected line
|
||||||
|
// one position higher than the last line
|
||||||
|
let target_position = viewport_height.saturating_sub(2);
|
||||||
|
self.scroll_offset = display_line.saturating_sub(target_position);
|
||||||
|
} else {
|
||||||
|
// This is the last line, can put it at the bottom
|
||||||
|
self.scroll_offset = display_line.saturating_sub(viewport_height - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!("Auto-scroll: selected={}, display_line={}, scroll_offset={}, viewport={}, total={}",
|
||||||
|
self.selected_index, display_line, self.scroll_offset, viewport_height, total_display_lines);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get currently selected service name (for actions)
|
||||||
|
/// Only returns parent service names since only parent services can be selected
|
||||||
|
pub fn get_selected_service(&self) -> Option<String> {
|
||||||
|
// Only parent services can be selected, so just get the parent service at selected_index
|
||||||
|
let mut parent_services: Vec<_> = self.parent_services.iter().collect();
|
||||||
|
parent_services.sort_by(|(a, _), (b, _)| a.cmp(b));
|
||||||
|
|
||||||
display_lines.get(self.selected_index).map(|(name, _, _, _)| name.clone())
|
parent_services.get(self.selected_index).map(|(name, _)| name.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get total count of selectable services (parent services only, not sub-services)
|
/// Get total count of selectable services (parent services only, not sub-services)
|
||||||
@ -274,6 +439,82 @@ impl ServicesWidget {
|
|||||||
self.parent_services.len()
|
self.parent_services.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get total display lines (parent services + sub-services)
|
||||||
|
pub fn get_total_display_lines(&self) -> usize {
|
||||||
|
let mut total = self.parent_services.len();
|
||||||
|
for sub_list in self.sub_services.values() {
|
||||||
|
total += sub_list.len();
|
||||||
|
}
|
||||||
|
total
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Scroll down by one line
|
||||||
|
pub fn scroll_down(&mut self, _visible_height: usize) {
|
||||||
|
let total_lines = self.get_total_display_lines();
|
||||||
|
|
||||||
|
// Use last_viewport_height if available (more accurate), otherwise can't scroll
|
||||||
|
let viewport_height = if self.last_viewport_height > 0 {
|
||||||
|
self.last_viewport_height
|
||||||
|
} else {
|
||||||
|
return; // Can't scroll without knowing viewport size
|
||||||
|
};
|
||||||
|
|
||||||
|
// Calculate exact max scroll to match render logic
|
||||||
|
// Stop scrolling when all remaining content fits in viewport
|
||||||
|
// At scroll_offset N: remaining = total_lines - N
|
||||||
|
// We can show all when: remaining <= viewport_height
|
||||||
|
// So max_scroll is when: total_lines - max_scroll = viewport_height
|
||||||
|
// Therefore: max_scroll = total_lines - viewport_height (but at least 0)
|
||||||
|
let max_scroll = total_lines.saturating_sub(viewport_height);
|
||||||
|
|
||||||
|
debug!("Scroll down: total={}, viewport={}, offset={}, max={}", total_lines, viewport_height, self.scroll_offset, max_scroll);
|
||||||
|
|
||||||
|
if self.scroll_offset < max_scroll {
|
||||||
|
self.scroll_offset += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Scroll up by one line
|
||||||
|
pub fn scroll_up(&mut self) {
|
||||||
|
if self.scroll_offset > 0 {
|
||||||
|
self.scroll_offset -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Map a display line index to a parent service index (returns None if clicked on sub-service)
|
||||||
|
pub fn display_line_to_parent_index(&self, display_line_index: usize) -> Option<usize> {
|
||||||
|
// Build the same display list to map line index to parent service index
|
||||||
|
let mut parent_index = 0;
|
||||||
|
let mut line_index = 0;
|
||||||
|
|
||||||
|
let mut parent_services: Vec<_> = self.parent_services.iter().collect();
|
||||||
|
parent_services.sort_by(|(a, _), (b, _)| a.cmp(b));
|
||||||
|
|
||||||
|
for (parent_name, _) in parent_services {
|
||||||
|
// Check if this line index matches a parent service
|
||||||
|
if line_index == display_line_index {
|
||||||
|
return Some(parent_index);
|
||||||
|
}
|
||||||
|
line_index += 1;
|
||||||
|
|
||||||
|
// Add sub-services for this parent (if any)
|
||||||
|
if let Some(sub_list) = self.sub_services.get(parent_name) {
|
||||||
|
for _ in sub_list {
|
||||||
|
if line_index == display_line_index {
|
||||||
|
// Clicked on a sub-service - return None (can't select sub-services)
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
line_index += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
parent_index += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
/// Calculate which parent service index corresponds to a display line index
|
/// Calculate which parent service index corresponds to a display line index
|
||||||
fn calculate_parent_service_index(&self, display_line_index: &usize) -> usize {
|
fn calculate_parent_service_index(&self, display_line_index: &usize) -> usize {
|
||||||
// Build the same display list to map line index to parent service index
|
// Build the same display list to map line index to parent service index
|
||||||
@ -304,6 +545,61 @@ impl ServicesWidget {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Widget for ServicesWidget {
|
impl Widget for ServicesWidget {
|
||||||
|
fn update_from_agent_data(&mut self, agent_data: &cm_dashboard_shared::AgentData) {
|
||||||
|
self.has_data = true;
|
||||||
|
self.parent_services.clear();
|
||||||
|
self.sub_services.clear();
|
||||||
|
|
||||||
|
for service in &agent_data.services {
|
||||||
|
// Store parent service
|
||||||
|
let parent_info = ServiceInfo {
|
||||||
|
metrics: Vec::new(), // Parent services don't have custom metrics
|
||||||
|
widget_status: service.service_status,
|
||||||
|
memory_bytes: service.memory_bytes,
|
||||||
|
restart_count: service.restart_count,
|
||||||
|
uptime_seconds: service.uptime_seconds,
|
||||||
|
};
|
||||||
|
self.parent_services.insert(service.name.clone(), parent_info);
|
||||||
|
|
||||||
|
// Process sub-services if any
|
||||||
|
if !service.sub_services.is_empty() {
|
||||||
|
let mut sub_list = Vec::new();
|
||||||
|
for sub_service in &service.sub_services {
|
||||||
|
// Convert metrics to display format
|
||||||
|
let metrics: Vec<(String, f32, Option<String>)> = sub_service.metrics.iter()
|
||||||
|
.map(|m| (m.label.clone(), m.value, m.unit.clone()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let sub_info = ServiceInfo {
|
||||||
|
metrics,
|
||||||
|
widget_status: sub_service.service_status,
|
||||||
|
memory_bytes: None, // Sub-services don't have individual metrics yet
|
||||||
|
restart_count: None,
|
||||||
|
uptime_seconds: None,
|
||||||
|
};
|
||||||
|
sub_list.push((sub_service.name.clone(), sub_info));
|
||||||
|
}
|
||||||
|
self.sub_services.insert(service.name.clone(), sub_list);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Aggregate status from all services
|
||||||
|
let mut all_statuses = Vec::new();
|
||||||
|
all_statuses.extend(self.parent_services.values().map(|info| info.widget_status));
|
||||||
|
for sub_list in self.sub_services.values() {
|
||||||
|
all_statuses.extend(sub_list.iter().map(|(_, info)| info.widget_status));
|
||||||
|
}
|
||||||
|
|
||||||
|
self.status = if all_statuses.is_empty() {
|
||||||
|
Status::Unknown
|
||||||
|
} else {
|
||||||
|
Status::aggregate(&all_statuses)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ServicesWidget {
|
||||||
|
#[allow(dead_code)]
|
||||||
fn update_from_metrics(&mut self, metrics: &[&Metric]) {
|
fn update_from_metrics(&mut self, metrics: &[&Metric]) {
|
||||||
debug!("Services widget updating with {} metrics", metrics.len());
|
debug!("Services widget updating with {} metrics", metrics.len());
|
||||||
|
|
||||||
@ -319,24 +615,15 @@ impl Widget for ServicesWidget {
|
|||||||
self.parent_services
|
self.parent_services
|
||||||
.entry(parent_service)
|
.entry(parent_service)
|
||||||
.or_insert(ServiceInfo {
|
.or_insert(ServiceInfo {
|
||||||
status: "unknown".to_string(),
|
metrics: Vec::new(),
|
||||||
memory_mb: None,
|
|
||||||
disk_gb: None,
|
|
||||||
latency_ms: None,
|
|
||||||
widget_status: Status::Unknown,
|
widget_status: Status::Unknown,
|
||||||
|
memory_bytes: None,
|
||||||
|
restart_count: None,
|
||||||
|
uptime_seconds: None,
|
||||||
});
|
});
|
||||||
|
|
||||||
if metric.name.ends_with("_status") {
|
if metric.name.ends_with("_status") {
|
||||||
service_info.status = metric.value.as_string();
|
|
||||||
service_info.widget_status = metric.status;
|
service_info.widget_status = metric.status;
|
||||||
} else if metric.name.ends_with("_memory_mb") {
|
|
||||||
if let Some(memory) = metric.value.as_f32() {
|
|
||||||
service_info.memory_mb = Some(memory);
|
|
||||||
}
|
|
||||||
} else if metric.name.ends_with("_disk_gb") {
|
|
||||||
if let Some(disk) = metric.value.as_f32() {
|
|
||||||
service_info.disk_gb = Some(disk);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(sub_name) => {
|
Some(sub_name) => {
|
||||||
@ -356,32 +643,18 @@ impl Widget for ServicesWidget {
|
|||||||
sub_service_list.push((
|
sub_service_list.push((
|
||||||
sub_name.clone(),
|
sub_name.clone(),
|
||||||
ServiceInfo {
|
ServiceInfo {
|
||||||
status: "unknown".to_string(),
|
metrics: Vec::new(),
|
||||||
memory_mb: None,
|
|
||||||
disk_gb: None,
|
|
||||||
latency_ms: None,
|
|
||||||
widget_status: Status::Unknown,
|
widget_status: Status::Unknown,
|
||||||
|
memory_bytes: None,
|
||||||
|
restart_count: None,
|
||||||
|
uptime_seconds: None,
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
&mut sub_service_list.last_mut().unwrap().1
|
&mut sub_service_list.last_mut().unwrap().1
|
||||||
};
|
};
|
||||||
|
|
||||||
if metric.name.ends_with("_status") {
|
if metric.name.ends_with("_status") {
|
||||||
sub_service_info.status = metric.value.as_string();
|
|
||||||
sub_service_info.widget_status = metric.status;
|
sub_service_info.widget_status = metric.status;
|
||||||
} else if metric.name.ends_with("_memory_mb") {
|
|
||||||
if let Some(memory) = metric.value.as_f32() {
|
|
||||||
sub_service_info.memory_mb = Some(memory);
|
|
||||||
}
|
|
||||||
} else if metric.name.ends_with("_disk_gb") {
|
|
||||||
if let Some(disk) = metric.value.as_f32() {
|
|
||||||
sub_service_info.disk_gb = Some(disk);
|
|
||||||
}
|
|
||||||
} else if metric.name.ends_with("_latency_ms") {
|
|
||||||
if let Some(latency) = metric.value.as_f32() {
|
|
||||||
sub_service_info.latency_ms = Some(latency);
|
|
||||||
sub_service_info.widget_status = metric.status;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -413,12 +686,23 @@ impl Widget for ServicesWidget {
|
|||||||
self.selected_index = total_count - 1;
|
self.selected_index = total_count - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clamp scroll offset to valid range after update
|
||||||
|
// This prevents scroll issues when switching between hosts or when service count changes
|
||||||
|
let total_display_lines = self.get_total_display_lines();
|
||||||
|
if total_display_lines == 0 {
|
||||||
|
self.scroll_offset = 0;
|
||||||
|
} else if self.scroll_offset >= total_display_lines {
|
||||||
|
// Clamp to max valid value, not reset to 0
|
||||||
|
self.scroll_offset = total_display_lines.saturating_sub(1);
|
||||||
|
}
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"Services widget updated: {} parent services, {} sub-service groups, total={}, selected={}, status={:?}",
|
"Services widget updated: {} parent services, {} sub-service groups, total={}, selected={}, scroll={}, status={:?}",
|
||||||
self.parent_services.len(),
|
self.parent_services.len(),
|
||||||
self.sub_services.len(),
|
self.sub_services.len(),
|
||||||
total_count,
|
total_count,
|
||||||
self.selected_index,
|
self.selected_index,
|
||||||
|
self.scroll_offset,
|
||||||
self.status
|
self.status
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -427,13 +711,9 @@ impl Widget for ServicesWidget {
|
|||||||
|
|
||||||
impl ServicesWidget {
|
impl ServicesWidget {
|
||||||
|
|
||||||
/// Render with focus, scroll, and command status for visual feedback
|
/// Render with focus
|
||||||
pub fn render_with_command_status(&mut self, frame: &mut Frame, area: Rect, is_focused: bool, scroll_offset: usize, command_status: Option<&CommandStatus>) {
|
pub fn render(&mut self, frame: &mut Frame, area: Rect, is_focused: bool) {
|
||||||
let services_block = if is_focused {
|
let services_block = Components::widget_block("services");
|
||||||
Components::focused_widget_block("services")
|
|
||||||
} else {
|
|
||||||
Components::widget_block("services")
|
|
||||||
};
|
|
||||||
let inner_area = services_block.inner(area);
|
let inner_area = services_block.inner(area);
|
||||||
frame.render_widget(services_block, area);
|
frame.render_widget(services_block, area);
|
||||||
|
|
||||||
@ -442,11 +722,28 @@ impl ServicesWidget {
|
|||||||
.constraints([Constraint::Length(1), Constraint::Min(0)])
|
.constraints([Constraint::Length(1), Constraint::Min(0)])
|
||||||
.split(inner_area);
|
.split(inner_area);
|
||||||
|
|
||||||
// Header
|
// Determine which columns to show based on available width
|
||||||
let header = format!(
|
let columns = ColumnVisibility::from_width(inner_area.width);
|
||||||
"{:<25} {:<10} {:<8} {:<8}",
|
|
||||||
"Service:", "Status:", "RAM:", "Disk:"
|
// Build header based on visible columns
|
||||||
);
|
let mut header_parts = Vec::new();
|
||||||
|
if columns.show_name {
|
||||||
|
header_parts.push(format!("{:<width$}", "Service:", width = ColumnVisibility::NAME_WIDTH as usize));
|
||||||
|
}
|
||||||
|
if columns.show_status {
|
||||||
|
header_parts.push(format!("{:<width$}", "Status:", width = ColumnVisibility::STATUS_WIDTH as usize));
|
||||||
|
}
|
||||||
|
if columns.show_ram {
|
||||||
|
header_parts.push(format!("{:<width$}", "RAM:", width = ColumnVisibility::RAM_WIDTH as usize));
|
||||||
|
}
|
||||||
|
if columns.show_uptime {
|
||||||
|
header_parts.push(format!("{:<width$}", "Uptime:", width = ColumnVisibility::UPTIME_WIDTH as usize));
|
||||||
|
}
|
||||||
|
if columns.show_restarts {
|
||||||
|
header_parts.push(format!("{:<width$}", "↻:", width = ColumnVisibility::RESTARTS_WIDTH as usize));
|
||||||
|
}
|
||||||
|
let header = header_parts.join(" ");
|
||||||
|
|
||||||
let header_para = Paragraph::new(header).style(Typography::muted());
|
let header_para = Paragraph::new(header).style(Typography::muted());
|
||||||
frame.render_widget(header_para, content_chunks[0]);
|
frame.render_widget(header_para, content_chunks[0]);
|
||||||
|
|
||||||
@ -457,13 +754,13 @@ impl ServicesWidget {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the existing render logic but with command status
|
// Render the services list
|
||||||
self.render_services_with_status(frame, content_chunks[1], is_focused, scroll_offset, command_status);
|
self.render_services(frame, content_chunks[1], is_focused, columns);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Render services list with command status awareness
|
/// Render services list
|
||||||
fn render_services_with_status(&mut self, frame: &mut Frame, area: Rect, is_focused: bool, scroll_offset: usize, command_status: Option<&CommandStatus>) {
|
fn render_services(&mut self, frame: &mut Frame, area: Rect, is_focused: bool, columns: ColumnVisibility) {
|
||||||
// Build hierarchical service list for display (same as existing logic)
|
// Build hierarchical service list for display
|
||||||
let mut display_lines: Vec<(String, Status, bool, Option<(ServiceInfo, bool)>)> = Vec::new();
|
let mut display_lines: Vec<(String, Status, bool, Option<(ServiceInfo, bool)>)> = Vec::new();
|
||||||
|
|
||||||
// Sort parent services alphabetically for consistent order
|
// Sort parent services alphabetically for consistent order
|
||||||
@ -472,8 +769,8 @@ impl ServicesWidget {
|
|||||||
|
|
||||||
for (parent_name, parent_info) in parent_services {
|
for (parent_name, parent_info) in parent_services {
|
||||||
// Add parent service line
|
// Add parent service line
|
||||||
let parent_line = self.format_parent_service_line(parent_name, parent_info);
|
let parent_line = self.format_parent_service_line(parent_name, parent_info, columns);
|
||||||
display_lines.push((parent_line, parent_info.widget_status, false, None)); // false = not sub-service
|
display_lines.push((parent_line, parent_info.widget_status, false, None));
|
||||||
|
|
||||||
// Add sub-services for this parent (if any)
|
// Add sub-services for this parent (if any)
|
||||||
if let Some(sub_list) = self.sub_services.get(parent_name) {
|
if let Some(sub_list) = self.sub_services.get(parent_name) {
|
||||||
@ -494,37 +791,64 @@ impl ServicesWidget {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply scroll offset and render visible lines (same as existing logic)
|
// Show only what fits, with "X more below" if needed
|
||||||
let available_lines = area.height as usize;
|
let available_lines = area.height as usize;
|
||||||
let total_lines = display_lines.len();
|
let total_lines = display_lines.len();
|
||||||
|
|
||||||
// Calculate scroll boundaries
|
// Store viewport height for accurate scroll calculations
|
||||||
let max_scroll = if total_lines > available_lines {
|
self.last_viewport_height = available_lines;
|
||||||
total_lines - available_lines
|
|
||||||
|
// Clamp scroll_offset to valid range based on current viewport and content
|
||||||
|
// This handles dynamic viewport size changes
|
||||||
|
let max_valid_scroll = total_lines.saturating_sub(available_lines);
|
||||||
|
if self.scroll_offset > max_valid_scroll {
|
||||||
|
self.scroll_offset = max_valid_scroll;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate how many lines remain after scroll offset
|
||||||
|
let remaining_lines = total_lines.saturating_sub(self.scroll_offset);
|
||||||
|
|
||||||
|
debug!("Render: total={}, viewport={}, offset={}, max={}, remaining={}",
|
||||||
|
total_lines, available_lines, self.scroll_offset, max_valid_scroll, remaining_lines);
|
||||||
|
|
||||||
|
// Check if all remaining content fits in viewport
|
||||||
|
let will_show_more_below = remaining_lines > available_lines;
|
||||||
|
|
||||||
|
// Reserve one line for "X more below" only if we can't fit everything
|
||||||
|
let lines_for_content = if will_show_more_below {
|
||||||
|
available_lines.saturating_sub(1)
|
||||||
} else {
|
} else {
|
||||||
total_lines.saturating_sub(1)
|
available_lines.min(remaining_lines)
|
||||||
};
|
};
|
||||||
let effective_scroll = scroll_offset.min(max_scroll);
|
|
||||||
|
// Apply scroll offset
|
||||||
// Get visible lines after scrolling
|
|
||||||
let visible_lines: Vec<_> = display_lines
|
let visible_lines: Vec<_> = display_lines
|
||||||
.iter()
|
.iter()
|
||||||
.skip(effective_scroll)
|
.skip(self.scroll_offset)
|
||||||
.take(available_lines)
|
.take(lines_for_content)
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
// Only calculate hidden_below if we actually reserved space for the message
|
||||||
|
let hidden_below = if will_show_more_below {
|
||||||
|
remaining_lines.saturating_sub(lines_for_content)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
let lines_to_show = visible_lines.len();
|
let lines_to_show = visible_lines.len();
|
||||||
|
|
||||||
if lines_to_show > 0 {
|
if lines_to_show > 0 {
|
||||||
|
// Add space for "X more below" message if needed
|
||||||
|
let total_chunks_needed = if hidden_below > 0 { lines_to_show + 1 } else { lines_to_show };
|
||||||
let service_chunks = Layout::default()
|
let service_chunks = Layout::default()
|
||||||
.direction(Direction::Vertical)
|
.direction(Direction::Vertical)
|
||||||
.constraints(vec![Constraint::Length(1); lines_to_show])
|
.constraints(vec![Constraint::Length(1); total_chunks_needed])
|
||||||
.split(area);
|
.split(area);
|
||||||
|
|
||||||
for (i, (line_text, line_status, is_sub, sub_info)) in visible_lines.iter().enumerate()
|
for (i, (line_text, line_status, is_sub, sub_info)) in visible_lines.iter().enumerate()
|
||||||
{
|
{
|
||||||
let actual_index = effective_scroll + i; // Real index in the full list
|
let actual_index = self.scroll_offset + i; // Account for scroll offset
|
||||||
|
|
||||||
// Only parent services can be selected - calculate parent service index
|
// Only parent services can be selected - calculate parent service index
|
||||||
let is_selected = if !*is_sub {
|
let is_selected = if !*is_sub {
|
||||||
// This is a parent service - count how many parent services came before this one
|
// This is a parent service - count how many parent services came before this one
|
||||||
@ -535,47 +859,23 @@ impl ServicesWidget {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let mut spans = if *is_sub && sub_info.is_some() {
|
let mut spans = if *is_sub && sub_info.is_some() {
|
||||||
// Use custom sub-service span creation WITH command status
|
// Use custom sub-service span creation
|
||||||
let (service_info, is_last) = sub_info.as_ref().unwrap();
|
let (service_info, is_last) = sub_info.as_ref().unwrap();
|
||||||
self.create_sub_service_spans_with_status(line_text, service_info, *is_last, command_status)
|
self.create_sub_service_spans(line_text, service_info, *is_last)
|
||||||
} else {
|
} else {
|
||||||
// Parent services - check if this parent service has a command in progress
|
// Parent services - use normal status spans
|
||||||
let service_spans = if let Some(status) = command_status {
|
StatusIcons::create_status_spans(*line_status, line_text)
|
||||||
match status {
|
|
||||||
CommandStatus::InProgress { target, .. } => {
|
|
||||||
if target == line_text {
|
|
||||||
// Create spans with progress status
|
|
||||||
let (icon, status_text, status_color) = self.get_service_icon_and_status(line_text, &ServiceInfo {
|
|
||||||
status: "".to_string(),
|
|
||||||
memory_mb: None,
|
|
||||||
disk_gb: None,
|
|
||||||
latency_ms: None,
|
|
||||||
widget_status: *line_status
|
|
||||||
}, command_status);
|
|
||||||
vec![
|
|
||||||
ratatui::text::Span::styled(format!("{} ", icon), Style::default().fg(status_color)),
|
|
||||||
ratatui::text::Span::styled(line_text.clone(), Style::default().fg(Theme::primary_text())),
|
|
||||||
ratatui::text::Span::styled(format!(" {}", status_text), Style::default().fg(status_color)),
|
|
||||||
]
|
|
||||||
} else {
|
|
||||||
StatusIcons::create_status_spans(*line_status, line_text)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => StatusIcons::create_status_spans(*line_status, line_text)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
StatusIcons::create_status_spans(*line_status, line_text)
|
|
||||||
};
|
|
||||||
service_spans
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Apply selection highlighting to parent services only, preserving status icon color
|
// Apply selection highlighting to parent services only
|
||||||
// Only show selection when Services panel is focused
|
// Only show selection when Services panel is focused
|
||||||
if is_selected && !*is_sub && is_focused {
|
if is_selected && !*is_sub && is_focused {
|
||||||
for (i, span) in spans.iter_mut().enumerate() {
|
for (i, span) in spans.iter_mut().enumerate() {
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
// First span is the status icon - preserve its color
|
// First span is the status icon - use background color for visibility against blue selection
|
||||||
span.style = span.style.bg(Theme::highlight());
|
span.style = span.style
|
||||||
|
.bg(Theme::highlight())
|
||||||
|
.fg(Theme::background());
|
||||||
} else {
|
} else {
|
||||||
// Other spans (text) get full selection highlighting
|
// Other spans (text) get full selection highlighting
|
||||||
span.style = span.style
|
span.style = span.style
|
||||||
@ -589,33 +889,12 @@ impl ServicesWidget {
|
|||||||
|
|
||||||
frame.render_widget(service_para, service_chunks[i]);
|
frame.render_widget(service_para, service_chunks[i]);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Show scroll indicator if there are more services than we can display (same as existing)
|
|
||||||
if total_lines > available_lines {
|
|
||||||
let hidden_above = effective_scroll;
|
|
||||||
let hidden_below = total_lines.saturating_sub(effective_scroll + available_lines);
|
|
||||||
|
|
||||||
if hidden_above > 0 || hidden_below > 0 {
|
// Show "X more below" message if content was truncated
|
||||||
let scroll_text = if hidden_above > 0 && hidden_below > 0 {
|
if hidden_below > 0 {
|
||||||
format!("... {} above, {} below", hidden_above, hidden_below)
|
let more_text = format!("... {} more below", hidden_below);
|
||||||
} else if hidden_above > 0 {
|
let more_para = Paragraph::new(more_text).style(Style::default().fg(Theme::border()));
|
||||||
format!("... {} more above", hidden_above)
|
frame.render_widget(more_para, service_chunks[lines_to_show]);
|
||||||
} else {
|
|
||||||
format!("... {} more below", hidden_below)
|
|
||||||
};
|
|
||||||
|
|
||||||
if available_lines > 0 && lines_to_show > 0 {
|
|
||||||
let last_line_area = Rect {
|
|
||||||
x: area.x,
|
|
||||||
y: area.y + (lines_to_show - 1) as u16,
|
|
||||||
width: area.width,
|
|
||||||
height: 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
let scroll_para = Paragraph::new(scroll_text).style(Typography::muted());
|
|
||||||
frame.render_widget(scroll_para, last_line_area);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,28 +1,31 @@
|
|||||||
use cm_dashboard_shared::{Metric, MetricValue, Status};
|
use cm_dashboard_shared::Status;
|
||||||
use ratatui::{
|
use ratatui::{
|
||||||
layout::Rect,
|
layout::Rect,
|
||||||
|
style::Style,
|
||||||
text::{Line, Span, Text},
|
text::{Line, Span, Text},
|
||||||
widgets::Paragraph,
|
widgets::Paragraph,
|
||||||
Frame,
|
Frame,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::Widget;
|
use crate::ui::theme::{StatusIcons, Theme, Typography};
|
||||||
use crate::ui::theme::{StatusIcons, Typography};
|
|
||||||
|
|
||||||
/// System widget displaying NixOS info, CPU, RAM, and Storage in unified layout
|
/// System widget displaying NixOS info, Network, CPU, RAM, and Storage in unified layout
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct SystemWidget {
|
pub struct SystemWidget {
|
||||||
// NixOS information
|
// NixOS information
|
||||||
nixos_build: Option<String>,
|
nixos_build: Option<String>,
|
||||||
config_hash: Option<String>,
|
|
||||||
active_users: Option<String>,
|
|
||||||
agent_hash: Option<String>,
|
agent_hash: Option<String>,
|
||||||
|
|
||||||
|
// Network interfaces
|
||||||
|
network_interfaces: Vec<cm_dashboard_shared::NetworkInterfaceData>,
|
||||||
|
|
||||||
// CPU metrics
|
// CPU metrics
|
||||||
cpu_load_1min: Option<f32>,
|
cpu_load_1min: Option<f32>,
|
||||||
cpu_load_5min: Option<f32>,
|
cpu_load_5min: Option<f32>,
|
||||||
cpu_load_15min: Option<f32>,
|
cpu_load_15min: Option<f32>,
|
||||||
cpu_frequency: Option<f32>,
|
cpu_cstates: Vec<cm_dashboard_shared::CStateInfo>,
|
||||||
|
cpu_model_name: Option<String>,
|
||||||
|
cpu_core_count: Option<u32>,
|
||||||
cpu_status: Status,
|
cpu_status: Status,
|
||||||
|
|
||||||
// Memory metrics
|
// Memory metrics
|
||||||
@ -33,20 +36,36 @@ pub struct SystemWidget {
|
|||||||
tmp_used_gb: Option<f32>,
|
tmp_used_gb: Option<f32>,
|
||||||
tmp_total_gb: Option<f32>,
|
tmp_total_gb: Option<f32>,
|
||||||
memory_status: Status,
|
memory_status: Status,
|
||||||
|
tmp_status: Status,
|
||||||
|
/// All tmpfs mounts (for auto-discovery support)
|
||||||
|
tmpfs_mounts: Vec<cm_dashboard_shared::TmpfsData>,
|
||||||
|
|
||||||
// Storage metrics (collected from disk metrics)
|
// Storage metrics (collected from disk metrics)
|
||||||
storage_pools: Vec<StoragePool>,
|
storage_pools: Vec<StoragePool>,
|
||||||
|
|
||||||
|
// Backup metrics
|
||||||
|
backup_last_time: Option<String>,
|
||||||
|
backup_status: Status,
|
||||||
|
backup_repositories: Vec<cm_dashboard_shared::BackupRepositoryData>,
|
||||||
|
|
||||||
// Overall status
|
// Overall status
|
||||||
has_data: bool,
|
has_data: bool,
|
||||||
|
|
||||||
|
// Scroll offset for viewport
|
||||||
|
pub scroll_offset: usize,
|
||||||
|
/// Last rendered viewport height (for accurate scroll bounds)
|
||||||
|
last_viewport_height: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct StoragePool {
|
struct StoragePool {
|
||||||
name: String,
|
name: String,
|
||||||
mount_point: String,
|
mount_point: String,
|
||||||
pool_type: String, // "Single", "Raid0", etc.
|
pool_type: String, // "single", "mergerfs (2+1)", "RAID5 (3+1)", etc.
|
||||||
drives: Vec<StorageDrive>,
|
drives: Vec<StorageDrive>, // For physical drives
|
||||||
|
data_drives: Vec<StorageDrive>, // For MergerFS pools
|
||||||
|
parity_drives: Vec<StorageDrive>, // For MergerFS pools
|
||||||
|
filesystems: Vec<FileSystem>, // For physical drive pools: individual filesystem children
|
||||||
usage_percent: Option<f32>,
|
usage_percent: Option<f32>,
|
||||||
used_gb: Option<f32>,
|
used_gb: Option<f32>,
|
||||||
total_gb: Option<f32>,
|
total_gb: Option<f32>,
|
||||||
@ -61,17 +80,27 @@ struct StorageDrive {
|
|||||||
status: Status,
|
status: Status,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct FileSystem {
|
||||||
|
mount_point: String,
|
||||||
|
usage_percent: Option<f32>,
|
||||||
|
used_gb: Option<f32>,
|
||||||
|
total_gb: Option<f32>,
|
||||||
|
status: Status,
|
||||||
|
}
|
||||||
|
|
||||||
impl SystemWidget {
|
impl SystemWidget {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
nixos_build: None,
|
nixos_build: None,
|
||||||
config_hash: None,
|
|
||||||
active_users: None,
|
|
||||||
agent_hash: None,
|
agent_hash: None,
|
||||||
|
network_interfaces: Vec::new(),
|
||||||
cpu_load_1min: None,
|
cpu_load_1min: None,
|
||||||
cpu_load_5min: None,
|
cpu_load_5min: None,
|
||||||
cpu_load_15min: None,
|
cpu_load_15min: None,
|
||||||
cpu_frequency: None,
|
cpu_cstates: Vec::new(),
|
||||||
|
cpu_model_name: None,
|
||||||
|
cpu_core_count: None,
|
||||||
cpu_status: Status::Unknown,
|
cpu_status: Status::Unknown,
|
||||||
memory_usage_percent: None,
|
memory_usage_percent: None,
|
||||||
memory_used_gb: None,
|
memory_used_gb: None,
|
||||||
@ -80,8 +109,15 @@ impl SystemWidget {
|
|||||||
tmp_used_gb: None,
|
tmp_used_gb: None,
|
||||||
tmp_total_gb: None,
|
tmp_total_gb: None,
|
||||||
memory_status: Status::Unknown,
|
memory_status: Status::Unknown,
|
||||||
|
tmp_status: Status::Unknown,
|
||||||
|
tmpfs_mounts: Vec::new(),
|
||||||
storage_pools: Vec::new(),
|
storage_pools: Vec::new(),
|
||||||
|
backup_last_time: None,
|
||||||
|
backup_status: Status::Unknown,
|
||||||
|
backup_repositories: Vec::new(),
|
||||||
has_data: false,
|
has_data: false,
|
||||||
|
scroll_offset: 0,
|
||||||
|
last_viewport_height: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,12 +131,19 @@ impl SystemWidget {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Format CPU frequency
|
/// Format CPU C-states (idle depth) with percentages
|
||||||
fn format_cpu_frequency(&self) -> String {
|
fn format_cpu_cstate(&self) -> String {
|
||||||
match self.cpu_frequency {
|
if self.cpu_cstates.is_empty() {
|
||||||
Some(freq) => format!("{:.0} MHz", freq),
|
return "—".to_string();
|
||||||
None => "— MHz".to_string(),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Format top 3 C-states with percentages: "C10:79% C8:10% C6:8%"
|
||||||
|
// Agent already sends clean names (C3, C10, etc.)
|
||||||
|
self.cpu_cstates
|
||||||
|
.iter()
|
||||||
|
.map(|cs| format!("{}:{:.0}%", cs.name, cs.percent))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(" ")
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Format memory usage
|
/// Format memory usage
|
||||||
@ -113,203 +156,361 @@ impl SystemWidget {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Format /tmp usage
|
|
||||||
fn format_tmp_usage(&self) -> String {
|
|
||||||
match (self.tmp_usage_percent, self.tmp_used_gb, self.tmp_total_gb) {
|
|
||||||
(Some(pct), Some(used), Some(total)) => {
|
|
||||||
let used_str = if used < 0.1 {
|
|
||||||
format!("{:.0}B", used * 1024.0) // Show as MB if very small
|
|
||||||
} else {
|
|
||||||
format!("{:.1}GB", used)
|
|
||||||
};
|
|
||||||
format!("{:.0}% {}/{:.1}GB", pct, used_str, total)
|
|
||||||
}
|
|
||||||
_ => "—% —GB/—GB".to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the current agent hash for rebuild completion detection
|
/// Get the current agent hash for rebuild completion detection
|
||||||
pub fn get_agent_hash(&self) -> Option<&String> {
|
pub fn _get_agent_hash(&self) -> Option<&String> {
|
||||||
self.agent_hash.as_ref()
|
self.agent_hash.as_ref()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get mount point for a pool name
|
/// Get the build version
|
||||||
fn get_mount_point_for_pool(&self, pool_name: &str) -> String {
|
pub fn get_build_version(&self) -> Option<String> {
|
||||||
match pool_name {
|
self.nixos_build.clone()
|
||||||
"root" => "/".to_string(),
|
|
||||||
"steampool" => "/mnt/steampool".to_string(),
|
|
||||||
"steampool_1" => "/steampool_1".to_string(),
|
|
||||||
"steampool_2" => "/steampool_2".to_string(),
|
|
||||||
_ => format!("/{}", pool_name), // Default fallback
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse storage metrics into pools and drives
|
/// Get the agent version
|
||||||
fn update_storage_from_metrics(&mut self, metrics: &[&Metric]) {
|
pub fn get_agent_version(&self) -> Option<String> {
|
||||||
let mut pools: std::collections::HashMap<String, StoragePool> = std::collections::HashMap::new();
|
self.agent_hash.clone()
|
||||||
|
}
|
||||||
for metric in metrics {
|
}
|
||||||
if metric.name.starts_with("disk_") {
|
|
||||||
if let Some(pool_name) = self.extract_pool_name(&metric.name) {
|
|
||||||
let mount_point = self.get_mount_point_for_pool(&pool_name);
|
|
||||||
let pool = pools.entry(pool_name.clone()).or_insert_with(|| StoragePool {
|
|
||||||
name: pool_name.clone(),
|
|
||||||
mount_point: mount_point.clone(),
|
|
||||||
pool_type: "Single".to_string(), // Default, could be enhanced
|
|
||||||
drives: Vec::new(),
|
|
||||||
usage_percent: None,
|
|
||||||
used_gb: None,
|
|
||||||
total_gb: None,
|
|
||||||
status: Status::Unknown,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Parse different metric types
|
use super::Widget;
|
||||||
if metric.name.contains("_usage_percent") {
|
|
||||||
if let MetricValue::Float(usage) = metric.value {
|
impl Widget for SystemWidget {
|
||||||
pool.usage_percent = Some(usage);
|
fn update_from_agent_data(&mut self, agent_data: &cm_dashboard_shared::AgentData) {
|
||||||
pool.status = metric.status.clone();
|
self.has_data = true;
|
||||||
}
|
|
||||||
} else if metric.name.contains("_used_gb") {
|
// Extract agent version
|
||||||
if let MetricValue::Float(used) = metric.value {
|
self.agent_hash = Some(agent_data.agent_version.clone());
|
||||||
pool.used_gb = Some(used);
|
|
||||||
}
|
// Extract build version
|
||||||
} else if metric.name.contains("_total_gb") {
|
self.nixos_build = agent_data.build_version.clone();
|
||||||
if let MetricValue::Float(total) = metric.value {
|
|
||||||
pool.total_gb = Some(total);
|
// Extract network interfaces
|
||||||
}
|
self.network_interfaces = agent_data.system.network.interfaces.clone();
|
||||||
} else if metric.name.contains("_temperature") {
|
|
||||||
if let Some(drive_name) = self.extract_drive_name(&metric.name) {
|
// Extract CPU data directly
|
||||||
// Find existing drive or create new one
|
let cpu = &agent_data.system.cpu;
|
||||||
let drive_exists = pool.drives.iter().any(|d| d.name == drive_name);
|
self.cpu_load_1min = Some(cpu.load_1min);
|
||||||
if !drive_exists {
|
self.cpu_load_5min = Some(cpu.load_5min);
|
||||||
pool.drives.push(StorageDrive {
|
self.cpu_load_15min = Some(cpu.load_15min);
|
||||||
name: drive_name.clone(),
|
self.cpu_cstates = cpu.cstates.clone();
|
||||||
temperature: None,
|
self.cpu_model_name = cpu.model_name.clone();
|
||||||
wear_percent: None,
|
self.cpu_core_count = cpu.core_count;
|
||||||
status: Status::Unknown,
|
self.cpu_status = Status::Ok;
|
||||||
});
|
|
||||||
}
|
// Extract memory data directly
|
||||||
|
let memory = &agent_data.system.memory;
|
||||||
if let Some(drive) = pool.drives.iter_mut().find(|d| d.name == drive_name) {
|
self.memory_usage_percent = Some(memory.usage_percent);
|
||||||
if let MetricValue::Float(temp) = metric.value {
|
self.memory_used_gb = Some(memory.used_gb);
|
||||||
drive.temperature = Some(temp);
|
self.memory_total_gb = Some(memory.total_gb);
|
||||||
drive.status = metric.status.clone();
|
self.memory_status = Status::Ok;
|
||||||
}
|
|
||||||
}
|
// Store all tmpfs mounts for display
|
||||||
}
|
self.tmpfs_mounts = memory.tmpfs.clone();
|
||||||
} else if metric.name.contains("_wear_percent") {
|
|
||||||
if let Some(drive_name) = self.extract_drive_name(&metric.name) {
|
// Extract tmpfs data (maintain backward compatibility for /tmp)
|
||||||
// Find existing drive or create new one
|
if let Some(tmp_data) = memory.tmpfs.iter().find(|t| t.mount == "/tmp") {
|
||||||
let drive_exists = pool.drives.iter().any(|d| d.name == drive_name);
|
self.tmp_usage_percent = Some(tmp_data.usage_percent);
|
||||||
if !drive_exists {
|
self.tmp_used_gb = Some(tmp_data.used_gb);
|
||||||
pool.drives.push(StorageDrive {
|
self.tmp_total_gb = Some(tmp_data.total_gb);
|
||||||
name: drive_name.clone(),
|
self.tmp_status = Status::Ok;
|
||||||
temperature: None,
|
|
||||||
wear_percent: None,
|
|
||||||
status: Status::Unknown,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(drive) = pool.drives.iter_mut().find(|d| d.name == drive_name) {
|
|
||||||
if let MetricValue::Float(wear) = metric.value {
|
|
||||||
drive.wear_percent = Some(wear);
|
|
||||||
drive.status = metric.status.clone();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert to sorted vec for consistent ordering
|
// Convert storage data to internal format
|
||||||
|
self.update_storage_from_agent_data(agent_data);
|
||||||
|
|
||||||
|
// Extract backup data
|
||||||
|
let backup = &agent_data.backup;
|
||||||
|
self.backup_last_time = backup.last_backup_time.clone();
|
||||||
|
self.backup_status = backup.backup_status;
|
||||||
|
self.backup_repositories = backup.repositories.clone();
|
||||||
|
|
||||||
|
// Clamp scroll offset to valid range after update
|
||||||
|
// This prevents scroll issues when switching between hosts
|
||||||
|
let total_lines = self.get_total_lines();
|
||||||
|
if total_lines == 0 {
|
||||||
|
self.scroll_offset = 0;
|
||||||
|
} else if self.scroll_offset >= total_lines {
|
||||||
|
// Clamp to max valid value, not reset to 0
|
||||||
|
self.scroll_offset = total_lines.saturating_sub(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SystemWidget {
|
||||||
|
/// Convert structured storage data to internal format
|
||||||
|
fn update_storage_from_agent_data(&mut self, agent_data: &cm_dashboard_shared::AgentData) {
|
||||||
|
let mut pools: std::collections::HashMap<String, StoragePool> = std::collections::HashMap::new();
|
||||||
|
|
||||||
|
// Convert drives
|
||||||
|
for drive in &agent_data.system.storage.drives {
|
||||||
|
let mut pool = StoragePool {
|
||||||
|
name: drive.name.clone(),
|
||||||
|
mount_point: drive.name.clone(),
|
||||||
|
pool_type: "drive".to_string(),
|
||||||
|
drives: Vec::new(),
|
||||||
|
data_drives: Vec::new(),
|
||||||
|
parity_drives: Vec::new(),
|
||||||
|
filesystems: Vec::new(),
|
||||||
|
usage_percent: None,
|
||||||
|
used_gb: None,
|
||||||
|
total_gb: None,
|
||||||
|
status: Status::Ok,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add drive info
|
||||||
|
let display_name = drive.serial_number.as_ref()
|
||||||
|
.map(|s| truncate_serial(s))
|
||||||
|
.unwrap_or(drive.name.clone());
|
||||||
|
let storage_drive = StorageDrive {
|
||||||
|
name: display_name,
|
||||||
|
temperature: drive.temperature_celsius,
|
||||||
|
wear_percent: drive.wear_percent,
|
||||||
|
status: Status::Ok,
|
||||||
|
};
|
||||||
|
pool.drives.push(storage_drive);
|
||||||
|
|
||||||
|
// Calculate totals from filesystems
|
||||||
|
let total_used: f32 = drive.filesystems.iter().map(|fs| fs.used_gb).sum();
|
||||||
|
let total_size: f32 = drive.filesystems.iter().map(|fs| fs.total_gb).sum();
|
||||||
|
let average_usage = if total_size > 0.0 { (total_used / total_size) * 100.0 } else { 0.0 };
|
||||||
|
|
||||||
|
pool.usage_percent = Some(average_usage);
|
||||||
|
pool.used_gb = Some(total_used);
|
||||||
|
pool.total_gb = Some(total_size);
|
||||||
|
|
||||||
|
// Add filesystems
|
||||||
|
for fs in &drive.filesystems {
|
||||||
|
let filesystem = FileSystem {
|
||||||
|
mount_point: fs.mount.clone(),
|
||||||
|
usage_percent: Some(fs.usage_percent),
|
||||||
|
used_gb: Some(fs.used_gb),
|
||||||
|
total_gb: Some(fs.total_gb),
|
||||||
|
status: Status::Ok,
|
||||||
|
};
|
||||||
|
pool.filesystems.push(filesystem);
|
||||||
|
}
|
||||||
|
|
||||||
|
pools.insert(drive.name.clone(), pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert pools (MergerFS, RAID, etc.)
|
||||||
|
for pool in &agent_data.system.storage.pools {
|
||||||
|
// Use agent-calculated status (combined health and usage status)
|
||||||
|
let pool_status = if pool.health_status == Status::Critical || pool.usage_status == Status::Critical {
|
||||||
|
Status::Critical
|
||||||
|
} else if pool.health_status == Status::Warning || pool.usage_status == Status::Warning {
|
||||||
|
Status::Warning
|
||||||
|
} else if pool.health_status == Status::Ok && pool.usage_status == Status::Ok {
|
||||||
|
Status::Ok
|
||||||
|
} else {
|
||||||
|
Status::Unknown
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut storage_pool = StoragePool {
|
||||||
|
name: pool.name.clone(),
|
||||||
|
mount_point: pool.mount.clone(),
|
||||||
|
pool_type: pool.pool_type.clone(),
|
||||||
|
drives: Vec::new(),
|
||||||
|
data_drives: Vec::new(),
|
||||||
|
parity_drives: Vec::new(),
|
||||||
|
filesystems: Vec::new(),
|
||||||
|
usage_percent: Some(pool.usage_percent),
|
||||||
|
used_gb: Some(pool.used_gb),
|
||||||
|
total_gb: Some(pool.total_gb),
|
||||||
|
status: pool_status,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add data drives - use agent-calculated status
|
||||||
|
for drive in &pool.data_drives {
|
||||||
|
// Use combined health and temperature status
|
||||||
|
let drive_status = if drive.health_status == Status::Critical || drive.temperature_status == Status::Critical {
|
||||||
|
Status::Critical
|
||||||
|
} else if drive.health_status == Status::Warning || drive.temperature_status == Status::Warning {
|
||||||
|
Status::Warning
|
||||||
|
} else if drive.health_status == Status::Ok && drive.temperature_status == Status::Ok {
|
||||||
|
Status::Ok
|
||||||
|
} else {
|
||||||
|
Status::Unknown
|
||||||
|
};
|
||||||
|
|
||||||
|
let display_name = drive.serial_number.as_ref()
|
||||||
|
.map(|s| truncate_serial(s))
|
||||||
|
.unwrap_or(drive.name.clone());
|
||||||
|
let storage_drive = StorageDrive {
|
||||||
|
name: display_name,
|
||||||
|
temperature: drive.temperature_celsius,
|
||||||
|
wear_percent: drive.wear_percent,
|
||||||
|
status: drive_status,
|
||||||
|
};
|
||||||
|
storage_pool.data_drives.push(storage_drive);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add parity drives - use agent-calculated status
|
||||||
|
for drive in &pool.parity_drives {
|
||||||
|
// Use combined health and temperature status
|
||||||
|
let drive_status = if drive.health_status == Status::Critical || drive.temperature_status == Status::Critical {
|
||||||
|
Status::Critical
|
||||||
|
} else if drive.health_status == Status::Warning || drive.temperature_status == Status::Warning {
|
||||||
|
Status::Warning
|
||||||
|
} else if drive.health_status == Status::Ok && drive.temperature_status == Status::Ok {
|
||||||
|
Status::Ok
|
||||||
|
} else {
|
||||||
|
Status::Unknown
|
||||||
|
};
|
||||||
|
|
||||||
|
let display_name = drive.serial_number.as_ref()
|
||||||
|
.map(|s| truncate_serial(s))
|
||||||
|
.unwrap_or(drive.name.clone());
|
||||||
|
let storage_drive = StorageDrive {
|
||||||
|
name: display_name,
|
||||||
|
temperature: drive.temperature_celsius,
|
||||||
|
wear_percent: drive.wear_percent,
|
||||||
|
status: drive_status,
|
||||||
|
};
|
||||||
|
storage_pool.parity_drives.push(storage_drive);
|
||||||
|
}
|
||||||
|
|
||||||
|
pools.insert(pool.name.clone(), storage_pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store pools
|
||||||
let mut pool_list: Vec<StoragePool> = pools.into_values().collect();
|
let mut pool_list: Vec<StoragePool> = pools.into_values().collect();
|
||||||
pool_list.sort_by(|a, b| a.name.cmp(&b.name)); // Sort alphabetically by name
|
pool_list.sort_by(|a, b| a.name.cmp(&b.name));
|
||||||
self.storage_pools = pool_list;
|
self.storage_pools = pool_list;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract pool name from disk metric name
|
/// Render storage section with enhanced tree structure
|
||||||
fn extract_pool_name(&self, metric_name: &str) -> Option<String> {
|
|
||||||
if let Some(captures) = metric_name.strip_prefix("disk_") {
|
|
||||||
if let Some(pos) = captures.find('_') {
|
|
||||||
return Some(captures[..pos].to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract drive name from disk metric name
|
|
||||||
fn extract_drive_name(&self, metric_name: &str) -> Option<String> {
|
|
||||||
// Pattern: disk_pool_drive_metric
|
|
||||||
let parts: Vec<&str> = metric_name.split('_').collect();
|
|
||||||
if parts.len() >= 3 && parts[0] == "disk" {
|
|
||||||
return Some(parts[2].to_string());
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Render storage section with tree structure
|
|
||||||
fn render_storage(&self) -> Vec<Line<'_>> {
|
fn render_storage(&self) -> Vec<Line<'_>> {
|
||||||
let mut lines = Vec::new();
|
let mut lines = Vec::new();
|
||||||
|
|
||||||
for pool in &self.storage_pools {
|
for pool in &self.storage_pools {
|
||||||
// Pool header line
|
// Pool header line with type and health
|
||||||
let usage_text = match (pool.usage_percent, pool.used_gb, pool.total_gb) {
|
let pool_label = if pool.pool_type == "drive" {
|
||||||
(Some(pct), Some(used), Some(total)) => {
|
// For physical drives, show the drive name with temperature and wear percentage if available
|
||||||
format!("{:.0}% {:.1}GB/{:.1}GB", pct, used, total)
|
// Physical drives only have one drive entry
|
||||||
|
if let Some(drive) = pool.drives.first() {
|
||||||
|
let mut drive_details = Vec::new();
|
||||||
|
if let Some(temp) = drive.temperature {
|
||||||
|
drive_details.push(format!("T: {}°C", temp as i32));
|
||||||
|
}
|
||||||
|
if let Some(wear) = drive.wear_percent {
|
||||||
|
drive_details.push(format!("W: {}%", wear as i32));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !drive_details.is_empty() {
|
||||||
|
format!("{} {}", drive.name, drive_details.join(" "))
|
||||||
|
} else {
|
||||||
|
drive.name.clone()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pool.name.clone()
|
||||||
}
|
}
|
||||||
_ => "—% —GB/—GB".to_string(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let pool_label = if pool.pool_type.to_lowercase() == "single" {
|
|
||||||
format!("{}:", pool.mount_point)
|
|
||||||
} else {
|
} else {
|
||||||
format!("{} ({}):", pool.mount_point, pool.pool_type)
|
// For mergerfs pools, show pool type with mount point
|
||||||
|
format!("mergerfs {}:", pool.mount_point)
|
||||||
};
|
};
|
||||||
let pool_spans = StatusIcons::create_status_spans(
|
|
||||||
pool.status.clone(),
|
let pool_spans = StatusIcons::create_status_spans(pool.status.clone(), &pool_label);
|
||||||
&pool_label
|
|
||||||
);
|
|
||||||
lines.push(Line::from(pool_spans));
|
lines.push(Line::from(pool_spans));
|
||||||
|
|
||||||
// Drive lines with tree structure
|
// Show individual filesystems for physical drives (matching CLAUDE.md format)
|
||||||
let has_usage_line = pool.usage_percent.is_some();
|
if pool.pool_type == "drive" {
|
||||||
for (i, drive) in pool.drives.iter().enumerate() {
|
// Show filesystem entries like: ├─ ● /: 55% 250.5GB/456.4GB
|
||||||
let is_last_drive = i == pool.drives.len() - 1;
|
for (i, filesystem) in pool.filesystems.iter().enumerate() {
|
||||||
let tree_symbol = if is_last_drive && !has_usage_line { "└─" } else { "├─" };
|
let is_last = i == pool.filesystems.len() - 1;
|
||||||
|
let tree_symbol = if is_last { " └─ " } else { " ├─ " };
|
||||||
|
|
||||||
|
let fs_text = format!("{}: {:.0}% {:.1}GB/{:.1}GB",
|
||||||
|
filesystem.mount_point,
|
||||||
|
filesystem.usage_percent.unwrap_or(0.0),
|
||||||
|
filesystem.used_gb.unwrap_or(0.0),
|
||||||
|
filesystem.total_gb.unwrap_or(0.0));
|
||||||
|
|
||||||
|
let mut fs_spans = vec![
|
||||||
|
Span::styled(tree_symbol, Typography::tree()),
|
||||||
|
];
|
||||||
|
fs_spans.extend(StatusIcons::create_status_spans(
|
||||||
|
filesystem.status.clone(),
|
||||||
|
&fs_text
|
||||||
|
));
|
||||||
|
lines.push(Line::from(fs_spans));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// For mergerfs pools, show structure matching CLAUDE.md format:
|
||||||
|
// ● mergerfs (2+1):
|
||||||
|
// ├─ Total: ● 63% 2355.2GB/3686.4GB
|
||||||
|
// ├─ Data Disks:
|
||||||
|
// │ ├─ ● sdb T: 24°C W: 5%
|
||||||
|
// │ └─ ● sdd T: 27°C W: 5%
|
||||||
|
// ├─ Parity: ● sdc T: 24°C W: 5%
|
||||||
|
// └─ Mount: /srv/media
|
||||||
|
|
||||||
let mut drive_info = Vec::new();
|
// Pool total usage
|
||||||
if let Some(temp) = drive.temperature {
|
let total_text = format!("{:.0}% {:.1}GB/{:.1}GB",
|
||||||
drive_info.push(format!("T: {:.0}C", temp));
|
pool.usage_percent.unwrap_or(0.0),
|
||||||
}
|
pool.used_gb.unwrap_or(0.0),
|
||||||
if let Some(wear) = drive.wear_percent {
|
pool.total_gb.unwrap_or(0.0)
|
||||||
drive_info.push(format!("W: {:.0}%", wear));
|
);
|
||||||
}
|
let mut total_spans = vec![
|
||||||
let drive_text = if drive_info.is_empty() {
|
Span::styled(" ├─ ", Typography::tree()),
|
||||||
drive.name.clone()
|
|
||||||
} else {
|
|
||||||
format!("{} {}", drive.name, drive_info.join(" • "))
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut drive_spans = vec![
|
|
||||||
Span::raw(" "),
|
|
||||||
Span::styled(tree_symbol, Typography::tree()),
|
|
||||||
Span::raw(" "),
|
|
||||||
];
|
];
|
||||||
drive_spans.extend(StatusIcons::create_status_spans(drive.status.clone(), &drive_text));
|
total_spans.extend(StatusIcons::create_status_spans(Status::Ok, &total_text));
|
||||||
lines.push(Line::from(drive_spans));
|
lines.push(Line::from(total_spans));
|
||||||
}
|
|
||||||
|
|
||||||
// Usage line
|
// Data drives - at same level as parity
|
||||||
if pool.usage_percent.is_some() {
|
let has_parity = !pool.parity_drives.is_empty();
|
||||||
let tree_symbol = "└─";
|
for (i, drive) in pool.data_drives.iter().enumerate() {
|
||||||
let mut usage_spans = vec![
|
let is_last_data = i == pool.data_drives.len() - 1;
|
||||||
Span::raw(" "),
|
let mut drive_details = Vec::new();
|
||||||
Span::styled(tree_symbol, Typography::tree()),
|
if let Some(temp) = drive.temperature {
|
||||||
Span::raw(" "),
|
drive_details.push(format!("T: {}°C", temp as i32));
|
||||||
];
|
}
|
||||||
usage_spans.extend(StatusIcons::create_status_spans(pool.status.clone(), &usage_text));
|
if let Some(wear) = drive.wear_percent {
|
||||||
lines.push(Line::from(usage_spans));
|
drive_details.push(format!("W: {}%", wear as i32));
|
||||||
|
}
|
||||||
|
|
||||||
|
let drive_text = if !drive_details.is_empty() {
|
||||||
|
format!("Data_{}: {} {}", i + 1, drive.name, drive_details.join(" "))
|
||||||
|
} else {
|
||||||
|
format!("Data_{}: {}", i + 1, drive.name)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Last data drive uses └─ if there's no parity, otherwise ├─
|
||||||
|
let tree_symbol = if is_last_data && !has_parity { " └─ " } else { " ├─ " };
|
||||||
|
let mut data_spans = vec![
|
||||||
|
Span::styled(tree_symbol, Typography::tree()),
|
||||||
|
];
|
||||||
|
data_spans.extend(StatusIcons::create_status_spans(drive.status.clone(), &drive_text));
|
||||||
|
lines.push(Line::from(data_spans));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parity drives - last item(s)
|
||||||
|
if !pool.parity_drives.is_empty() {
|
||||||
|
for (i, drive) in pool.parity_drives.iter().enumerate() {
|
||||||
|
let is_last = i == pool.parity_drives.len() - 1;
|
||||||
|
let mut drive_details = Vec::new();
|
||||||
|
if let Some(temp) = drive.temperature {
|
||||||
|
drive_details.push(format!("T: {}°C", temp as i32));
|
||||||
|
}
|
||||||
|
if let Some(wear) = drive.wear_percent {
|
||||||
|
drive_details.push(format!("W: {}%", wear as i32));
|
||||||
|
}
|
||||||
|
|
||||||
|
let drive_text = if !drive_details.is_empty() {
|
||||||
|
format!("Parity: {} {}", drive.name, drive_details.join(" "))
|
||||||
|
} else {
|
||||||
|
format!("Parity: {}", drive.name)
|
||||||
|
};
|
||||||
|
|
||||||
|
let tree_symbol = if is_last { " └─ " } else { " ├─ " };
|
||||||
|
let mut parity_spans = vec![
|
||||||
|
Span::styled(tree_symbol, Typography::tree()),
|
||||||
|
];
|
||||||
|
parity_spans.extend(StatusIcons::create_status_spans(drive.status.clone(), &drive_text));
|
||||||
|
lines.push(Line::from(parity_spans));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -317,149 +518,385 @@ impl SystemWidget {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Widget for SystemWidget {
|
/// Truncate serial number to last 8 characters
|
||||||
fn update_from_metrics(&mut self, metrics: &[&Metric]) {
|
fn truncate_serial(serial: &str) -> String {
|
||||||
self.has_data = !metrics.is_empty();
|
let len = serial.len();
|
||||||
|
if len > 8 {
|
||||||
for metric in metrics {
|
serial[len - 8..].to_string()
|
||||||
match metric.name.as_str() {
|
} else {
|
||||||
// NixOS metrics
|
serial.to_string()
|
||||||
"system_nixos_build" => {
|
|
||||||
if let MetricValue::String(build) = &metric.value {
|
|
||||||
self.nixos_build = Some(build.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"system_config_hash" => {
|
|
||||||
if let MetricValue::String(hash) = &metric.value {
|
|
||||||
self.config_hash = Some(hash.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"system_active_users" => {
|
|
||||||
if let MetricValue::String(users) = &metric.value {
|
|
||||||
self.active_users = Some(users.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"system_agent_hash" => {
|
|
||||||
if let MetricValue::String(hash) = &metric.value {
|
|
||||||
self.agent_hash = Some(hash.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CPU metrics
|
|
||||||
"cpu_load_1min" => {
|
|
||||||
if let MetricValue::Float(load) = metric.value {
|
|
||||||
self.cpu_load_1min = Some(load);
|
|
||||||
self.cpu_status = metric.status.clone();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"cpu_load_5min" => {
|
|
||||||
if let MetricValue::Float(load) = metric.value {
|
|
||||||
self.cpu_load_5min = Some(load);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"cpu_load_15min" => {
|
|
||||||
if let MetricValue::Float(load) = metric.value {
|
|
||||||
self.cpu_load_15min = Some(load);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"cpu_frequency_mhz" => {
|
|
||||||
if let MetricValue::Float(freq) = metric.value {
|
|
||||||
self.cpu_frequency = Some(freq);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Memory metrics
|
|
||||||
"memory_usage_percent" => {
|
|
||||||
if let MetricValue::Float(usage) = metric.value {
|
|
||||||
self.memory_usage_percent = Some(usage);
|
|
||||||
self.memory_status = metric.status.clone();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"memory_used_gb" => {
|
|
||||||
if let MetricValue::Float(used) = metric.value {
|
|
||||||
self.memory_used_gb = Some(used);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"memory_total_gb" => {
|
|
||||||
if let MetricValue::Float(total) = metric.value {
|
|
||||||
self.memory_total_gb = Some(total);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tmpfs metrics
|
|
||||||
"memory_tmp_usage_percent" => {
|
|
||||||
if let MetricValue::Float(usage) = metric.value {
|
|
||||||
self.tmp_usage_percent = Some(usage);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"memory_tmp_used_gb" => {
|
|
||||||
if let MetricValue::Float(used) = metric.value {
|
|
||||||
self.tmp_used_gb = Some(used);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"memory_tmp_total_gb" => {
|
|
||||||
if let MetricValue::Float(total) = metric.value {
|
|
||||||
self.tmp_total_gb = Some(total);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update storage from all disk metrics
|
|
||||||
self.update_storage_from_metrics(metrics);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SystemWidget {
|
impl SystemWidget {
|
||||||
/// Render with scroll offset support
|
/// Render backup section for display
|
||||||
pub fn render_with_scroll(&mut self, frame: &mut Frame, area: Rect, scroll_offset: usize) {
|
fn render_backup(&self) -> Vec<Line<'_>> {
|
||||||
let mut lines = Vec::new();
|
let mut lines = Vec::new();
|
||||||
|
|
||||||
// NixOS section
|
if self.backup_repositories.is_empty() {
|
||||||
lines.push(Line::from(vec![
|
return lines;
|
||||||
Span::styled("NixOS:", Typography::widget_title())
|
}
|
||||||
]));
|
|
||||||
|
// Format backup time (use complete timestamp)
|
||||||
let config_text = self.config_hash.as_deref().unwrap_or("unknown");
|
let time_display = if let Some(ref time_str) = self.backup_last_time {
|
||||||
lines.push(Line::from(vec![
|
time_str.clone()
|
||||||
Span::styled(format!("Build: {}", config_text), Typography::secondary())
|
|
||||||
]));
|
|
||||||
|
|
||||||
let agent_hash_text = self.agent_hash.as_deref().unwrap_or("unknown");
|
|
||||||
let short_hash = if agent_hash_text.len() > 8 && agent_hash_text != "unknown" {
|
|
||||||
&agent_hash_text[..8]
|
|
||||||
} else {
|
} else {
|
||||||
agent_hash_text
|
"unknown".to_string()
|
||||||
};
|
};
|
||||||
lines.push(Line::from(vec![
|
|
||||||
Span::styled(format!("Agent: {}", short_hash), Typography::secondary())
|
// Header: just the timestamp
|
||||||
]));
|
let repo_spans = StatusIcons::create_status_spans(self.backup_status, &time_display);
|
||||||
|
lines.push(Line::from(repo_spans));
|
||||||
|
|
||||||
|
// List all repositories with archive count and size
|
||||||
|
let repo_count = self.backup_repositories.len();
|
||||||
|
for (idx, repo) in self.backup_repositories.iter().enumerate() {
|
||||||
|
let tree_char = if idx == repo_count - 1 { "└─" } else { "├─" };
|
||||||
|
|
||||||
|
// Format size: use kB for < 1MB, MB for < 1GB, otherwise GB
|
||||||
|
let size_display = if repo.repo_size_gb < 0.001 {
|
||||||
|
format!("{:.0}kB", repo.repo_size_gb * 1024.0 * 1024.0)
|
||||||
|
} else if repo.repo_size_gb < 1.0 {
|
||||||
|
format!("{:.0}MB", repo.repo_size_gb * 1024.0)
|
||||||
|
} else {
|
||||||
|
format!("{:.1}GB", repo.repo_size_gb)
|
||||||
|
};
|
||||||
|
|
||||||
|
let repo_text = format!("{} ({}) {}", repo.name, repo.archive_count, size_display);
|
||||||
|
|
||||||
|
let mut repo_spans = vec![
|
||||||
|
Span::styled(format!(" {} ", tree_char), Typography::tree()),
|
||||||
|
];
|
||||||
|
repo_spans.extend(StatusIcons::create_status_spans(repo.status, &repo_text));
|
||||||
|
lines.push(Line::from(repo_spans));
|
||||||
|
}
|
||||||
|
|
||||||
|
lines
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compress IPv4 addresses from same subnet
|
||||||
|
/// Example: "192.168.30.1, 192.168.30.100" -> "192.168.30.1, 100"
|
||||||
|
fn compress_ipv4_addresses(addresses: &[String]) -> String {
|
||||||
|
if addresses.is_empty() {
|
||||||
|
return String::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
if addresses.len() == 1 {
|
||||||
|
return addresses[0].clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut result = Vec::new();
|
||||||
|
let mut last_prefix = String::new();
|
||||||
|
|
||||||
|
for addr in addresses {
|
||||||
|
let parts: Vec<&str> = addr.split('.').collect();
|
||||||
|
if parts.len() == 4 {
|
||||||
|
let prefix = format!("{}.{}.{}", parts[0], parts[1], parts[2]);
|
||||||
|
|
||||||
|
if prefix == last_prefix {
|
||||||
|
// Same subnet, show only last octet
|
||||||
|
result.push(parts[3].to_string());
|
||||||
|
} else {
|
||||||
|
// Different subnet, show full IP
|
||||||
|
result.push(addr.clone());
|
||||||
|
last_prefix = prefix;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Invalid IP format, show as-is
|
||||||
|
result.push(addr.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.join(", ")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render network section for display with physical/virtual grouping
|
||||||
|
fn render_network(&self) -> Vec<Line<'_>> {
|
||||||
|
let mut lines = Vec::new();
|
||||||
|
|
||||||
|
if self.network_interfaces.is_empty() {
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Separate physical and virtual interfaces
|
||||||
|
let physical: Vec<_> = self.network_interfaces.iter().filter(|i| i.is_physical).collect();
|
||||||
|
let virtual_interfaces: Vec<_> = self.network_interfaces.iter().filter(|i| !i.is_physical).collect();
|
||||||
|
|
||||||
|
// Find standalone virtual interfaces (those without a parent)
|
||||||
|
let mut standalone_virtual: Vec<_> = virtual_interfaces.iter()
|
||||||
|
.filter(|i| i.parent_interface.is_none())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Sort standalone virtual: VLANs first (by VLAN ID), then others alphabetically
|
||||||
|
standalone_virtual.sort_by(|a, b| {
|
||||||
|
match (a.vlan_id, b.vlan_id) {
|
||||||
|
(Some(vlan_a), Some(vlan_b)) => vlan_a.cmp(&vlan_b),
|
||||||
|
(Some(_), None) => std::cmp::Ordering::Less,
|
||||||
|
(None, Some(_)) => std::cmp::Ordering::Greater,
|
||||||
|
(None, None) => a.name.cmp(&b.name),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Render physical interfaces with their children
|
||||||
|
for (phy_idx, interface) in physical.iter().enumerate() {
|
||||||
|
let is_last_physical = phy_idx == physical.len() - 1 && standalone_virtual.is_empty();
|
||||||
|
|
||||||
|
// Physical interface header with status icon
|
||||||
|
let mut header_spans = vec![];
|
||||||
|
header_spans.extend(StatusIcons::create_status_spans(
|
||||||
|
interface.link_status.clone(),
|
||||||
|
&format!("{}:", interface.name)
|
||||||
|
));
|
||||||
|
lines.push(Line::from(header_spans));
|
||||||
|
|
||||||
|
// Find child interfaces for this physical interface
|
||||||
|
let mut children: Vec<_> = virtual_interfaces.iter()
|
||||||
|
.filter(|vi| {
|
||||||
|
if let Some(parent) = &vi.parent_interface {
|
||||||
|
parent == &interface.name
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Sort children: VLANs first (by VLAN ID), then others alphabetically
|
||||||
|
children.sort_by(|a, b| {
|
||||||
|
match (a.vlan_id, b.vlan_id) {
|
||||||
|
(Some(vlan_a), Some(vlan_b)) => vlan_a.cmp(&vlan_b),
|
||||||
|
(Some(_), None) => std::cmp::Ordering::Less,
|
||||||
|
(None, Some(_)) => std::cmp::Ordering::Greater,
|
||||||
|
(None, None) => a.name.cmp(&b.name),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Count total items under this physical interface (IPs + children)
|
||||||
|
let ip_count = interface.ipv4_addresses.len() + interface.ipv6_addresses.len();
|
||||||
|
let total_children = ip_count + children.len();
|
||||||
|
let mut child_index = 0;
|
||||||
|
|
||||||
|
// IPv4 addresses on the physical interface itself
|
||||||
|
for ipv4 in &interface.ipv4_addresses {
|
||||||
|
child_index += 1;
|
||||||
|
let is_last = child_index == total_children && is_last_physical;
|
||||||
|
let tree_symbol = if is_last { " └─ " } else { " ├─ " };
|
||||||
|
lines.push(Line::from(vec![
|
||||||
|
Span::styled(tree_symbol, Typography::tree()),
|
||||||
|
Span::styled(format!("ip: {}", ipv4), Typography::secondary()),
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
|
// IPv6 addresses on the physical interface itself
|
||||||
|
for ipv6 in &interface.ipv6_addresses {
|
||||||
|
child_index += 1;
|
||||||
|
let is_last = child_index == total_children && is_last_physical;
|
||||||
|
let tree_symbol = if is_last { " └─ " } else { " ├─ " };
|
||||||
|
lines.push(Line::from(vec![
|
||||||
|
Span::styled(tree_symbol, Typography::tree()),
|
||||||
|
Span::styled(format!("ip: {}", ipv6), Typography::secondary()),
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Child virtual interfaces (VLANs, etc.)
|
||||||
|
for child in children {
|
||||||
|
child_index += 1;
|
||||||
|
let is_last = child_index == total_children && is_last_physical;
|
||||||
|
let tree_symbol = if is_last { " └─ " } else { " ├─ " };
|
||||||
|
|
||||||
|
let ip_text = if !child.ipv4_addresses.is_empty() {
|
||||||
|
Self::compress_ipv4_addresses(&child.ipv4_addresses)
|
||||||
|
} else if !child.ipv6_addresses.is_empty() {
|
||||||
|
child.ipv6_addresses.join(", ")
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Format: "name (vlan X): IP" or "name: IP"
|
||||||
|
let child_text = if let Some(vlan_id) = child.vlan_id {
|
||||||
|
if !ip_text.is_empty() {
|
||||||
|
format!("{} (vlan {}): {}", child.name, vlan_id, ip_text)
|
||||||
|
} else {
|
||||||
|
format!("{} (vlan {}):", child.name, vlan_id)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if !ip_text.is_empty() {
|
||||||
|
format!("{}: {}", child.name, ip_text)
|
||||||
|
} else {
|
||||||
|
format!("{}:", child.name)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
lines.push(Line::from(vec![
|
||||||
|
Span::styled(tree_symbol, Typography::tree()),
|
||||||
|
Span::styled(child_text, Typography::secondary()),
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Render standalone virtual interfaces (those without a parent)
|
||||||
|
for (virt_idx, interface) in standalone_virtual.iter().enumerate() {
|
||||||
|
let is_last = virt_idx == standalone_virtual.len() - 1;
|
||||||
|
let tree_symbol = if is_last { " └─ " } else { " ├─ " };
|
||||||
|
|
||||||
|
// Virtual interface with IPs
|
||||||
|
let ip_text = if !interface.ipv4_addresses.is_empty() {
|
||||||
|
Self::compress_ipv4_addresses(&interface.ipv4_addresses)
|
||||||
|
} else if !interface.ipv6_addresses.is_empty() {
|
||||||
|
interface.ipv6_addresses.join(", ")
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Format: "name (vlan X): IP" or "name: IP"
|
||||||
|
let interface_text = if let Some(vlan_id) = interface.vlan_id {
|
||||||
|
if !ip_text.is_empty() {
|
||||||
|
format!("{} (vlan {}): {}", interface.name, vlan_id, ip_text)
|
||||||
|
} else {
|
||||||
|
format!("{} (vlan {}):", interface.name, vlan_id)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if !ip_text.is_empty() {
|
||||||
|
format!("{}: {}", interface.name, ip_text)
|
||||||
|
} else {
|
||||||
|
format!("{}:", interface.name)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
lines.push(Line::from(vec![
|
||||||
|
Span::styled(tree_symbol, Typography::tree()),
|
||||||
|
Span::styled(interface_text, Typography::secondary()),
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
|
lines
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render system widget
|
||||||
|
/// Scroll down by one line
|
||||||
|
pub fn scroll_down(&mut self, _visible_height: usize, _total_lines: usize) {
|
||||||
|
let total_lines = self.get_total_lines();
|
||||||
|
|
||||||
|
// Use last_viewport_height if available (more accurate), otherwise can't scroll
|
||||||
|
let viewport_height = if self.last_viewport_height > 0 {
|
||||||
|
self.last_viewport_height
|
||||||
|
} else {
|
||||||
|
return; // Can't scroll without knowing viewport size
|
||||||
|
};
|
||||||
|
|
||||||
|
// Max scroll should allow us to see all remaining content
|
||||||
|
// When scroll_offset + viewport_height >= total_lines, we can see everything
|
||||||
|
let max_scroll = if total_lines > viewport_height {
|
||||||
|
total_lines - viewport_height
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
if self.scroll_offset < max_scroll {
|
||||||
|
self.scroll_offset += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Scroll up by one line
|
||||||
|
pub fn scroll_up(&mut self) {
|
||||||
|
if self.scroll_offset > 0 {
|
||||||
|
self.scroll_offset -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get total line count (needs to be calculated before rendering)
|
||||||
|
pub fn get_total_lines(&self) -> usize {
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
// CPU section (2+ lines for load/cstate, +1 if has model/cores)
|
||||||
|
count += 2;
|
||||||
|
if self.cpu_model_name.is_some() || self.cpu_core_count.is_some() {
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// RAM section (1 + tmpfs mounts)
|
||||||
|
count += 2;
|
||||||
|
count += self.tmpfs_mounts.len();
|
||||||
|
|
||||||
|
// Network section
|
||||||
|
if !self.network_interfaces.is_empty() {
|
||||||
|
count += 1; // Header
|
||||||
|
// Count network lines (would need to mirror render_network logic)
|
||||||
|
for iface in &self.network_interfaces {
|
||||||
|
count += 1; // Interface name
|
||||||
|
count += iface.ipv4_addresses.len();
|
||||||
|
count += iface.ipv6_addresses.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Storage section
|
||||||
|
count += 1; // Header
|
||||||
|
for pool in &self.storage_pools {
|
||||||
|
count += 1; // Pool header
|
||||||
|
count += pool.drives.len();
|
||||||
|
count += pool.data_drives.len();
|
||||||
|
count += pool.parity_drives.len();
|
||||||
|
count += pool.filesystems.len();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backup section
|
||||||
|
if !self.backup_repositories.is_empty() {
|
||||||
|
count += 1; // Header: "Backup:"
|
||||||
|
count += 1; // Repo count and timestamp header
|
||||||
|
count += self.backup_repositories.len(); // Individual repos
|
||||||
|
}
|
||||||
|
|
||||||
|
count
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn render(&mut self, frame: &mut Frame, area: Rect, _hostname: &str, _config: Option<&crate::config::DashboardConfig>) {
|
||||||
|
// Store viewport height for accurate scroll calculations
|
||||||
|
self.last_viewport_height = area.height as usize;
|
||||||
|
|
||||||
|
let mut lines = Vec::new();
|
||||||
|
|
||||||
// CPU section
|
// CPU section
|
||||||
lines.push(Line::from(vec![
|
lines.push(Line::from(vec![
|
||||||
Span::styled("CPU:", Typography::widget_title())
|
Span::styled("CPU:", Typography::widget_title())
|
||||||
]));
|
]));
|
||||||
|
|
||||||
let load_text = self.format_cpu_load();
|
let load_text = self.format_cpu_load();
|
||||||
let cpu_spans = StatusIcons::create_status_spans(
|
let cpu_spans = StatusIcons::create_status_spans(
|
||||||
self.cpu_status.clone(),
|
self.cpu_status.clone(),
|
||||||
&format!("Load: {}", load_text)
|
&format!("Load: {}", load_text)
|
||||||
);
|
);
|
||||||
lines.push(Line::from(cpu_spans));
|
lines.push(Line::from(cpu_spans));
|
||||||
|
|
||||||
let freq_text = self.format_cpu_frequency();
|
let cstate_text = self.format_cpu_cstate();
|
||||||
|
let has_cpu_info = self.cpu_model_name.is_some() || self.cpu_core_count.is_some();
|
||||||
|
let cstate_tree = if has_cpu_info { " ├─ " } else { " └─ " };
|
||||||
lines.push(Line::from(vec![
|
lines.push(Line::from(vec![
|
||||||
Span::styled(" └─ ", Typography::tree()),
|
Span::styled(cstate_tree, Typography::tree()),
|
||||||
Span::styled(format!("Freq: {}", freq_text), Typography::secondary())
|
Span::styled(format!("C-state: {}", cstate_text), Typography::secondary())
|
||||||
]));
|
]));
|
||||||
|
|
||||||
|
// CPU model and core count (if available)
|
||||||
|
if let (Some(model), Some(cores)) = (&self.cpu_model_name, self.cpu_core_count) {
|
||||||
|
lines.push(Line::from(vec![
|
||||||
|
Span::styled(" └─ ", Typography::tree()),
|
||||||
|
Span::styled(format!("{} ({} cores)", model, cores), Typography::secondary())
|
||||||
|
]));
|
||||||
|
} else if let Some(model) = &self.cpu_model_name {
|
||||||
|
lines.push(Line::from(vec![
|
||||||
|
Span::styled(" └─ ", Typography::tree()),
|
||||||
|
Span::styled(model.clone(), Typography::secondary())
|
||||||
|
]));
|
||||||
|
} else if let Some(cores) = self.cpu_core_count {
|
||||||
|
lines.push(Line::from(vec![
|
||||||
|
Span::styled(" └─ ", Typography::tree()),
|
||||||
|
Span::styled(format!("{} cores", cores), Typography::secondary())
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
|
||||||
// RAM section
|
// RAM section
|
||||||
lines.push(Line::from(vec![
|
lines.push(Line::from(vec![
|
||||||
Span::styled("RAM:", Typography::widget_title())
|
Span::styled("RAM:", Typography::widget_title())
|
||||||
]));
|
]));
|
||||||
|
|
||||||
let memory_text = self.format_memory_usage();
|
let memory_text = self.format_memory_usage();
|
||||||
let memory_spans = StatusIcons::create_status_spans(
|
let memory_spans = StatusIcons::create_status_spans(
|
||||||
self.memory_status.clone(),
|
self.memory_status.clone(),
|
||||||
@ -467,90 +904,107 @@ impl SystemWidget {
|
|||||||
);
|
);
|
||||||
lines.push(Line::from(memory_spans));
|
lines.push(Line::from(memory_spans));
|
||||||
|
|
||||||
let tmp_text = self.format_tmp_usage();
|
// Display all tmpfs mounts
|
||||||
let mut tmp_spans = vec![
|
for (i, tmpfs) in self.tmpfs_mounts.iter().enumerate() {
|
||||||
Span::styled(" └─ ", Typography::tree()),
|
let is_last = i == self.tmpfs_mounts.len() - 1;
|
||||||
];
|
let tree_symbol = if is_last { " └─ " } else { " ├─ " };
|
||||||
tmp_spans.extend(StatusIcons::create_status_spans(
|
|
||||||
self.memory_status.clone(),
|
let usage_text = if tmpfs.total_gb > 0.0 {
|
||||||
&format!("/tmp: {}", tmp_text)
|
format!("{:.0}% {:.1}GB/{:.1}GB",
|
||||||
));
|
tmpfs.usage_percent,
|
||||||
lines.push(Line::from(tmp_spans));
|
tmpfs.used_gb,
|
||||||
|
tmpfs.total_gb)
|
||||||
|
} else {
|
||||||
|
"— —/—".to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut tmpfs_spans = vec![
|
||||||
|
Span::styled(tree_symbol, Typography::tree()),
|
||||||
|
];
|
||||||
|
tmpfs_spans.extend(StatusIcons::create_status_spans(
|
||||||
|
Status::Ok, // TODO: Calculate status based on usage_percent
|
||||||
|
&format!("{}: {}", tmpfs.mount, usage_text)
|
||||||
|
));
|
||||||
|
lines.push(Line::from(tmpfs_spans));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Network section
|
||||||
|
if !self.network_interfaces.is_empty() {
|
||||||
|
lines.push(Line::from(vec![
|
||||||
|
Span::styled("Network:", Typography::widget_title())
|
||||||
|
]));
|
||||||
|
|
||||||
|
let network_lines = self.render_network();
|
||||||
|
lines.extend(network_lines);
|
||||||
|
}
|
||||||
|
|
||||||
// Storage section
|
// Storage section
|
||||||
lines.push(Line::from(vec![
|
lines.push(Line::from(vec![
|
||||||
Span::styled("Storage:", Typography::widget_title())
|
Span::styled("Storage:", Typography::widget_title())
|
||||||
]));
|
]));
|
||||||
|
|
||||||
// Storage items with overflow handling
|
// Storage items - let main overflow logic handle truncation
|
||||||
let storage_lines = self.render_storage();
|
let storage_lines = self.render_storage();
|
||||||
let remaining_space = area.height.saturating_sub(lines.len() as u16);
|
lines.extend(storage_lines);
|
||||||
|
|
||||||
if storage_lines.len() <= remaining_space as usize {
|
// Backup section (if available)
|
||||||
// All storage lines fit
|
if !self.backup_repositories.is_empty() {
|
||||||
lines.extend(storage_lines);
|
lines.push(Line::from(vec![
|
||||||
} else if remaining_space >= 2 {
|
Span::styled("Backup:", Typography::widget_title())
|
||||||
// Show what we can and add overflow indicator
|
]));
|
||||||
let lines_to_show = (remaining_space - 1) as usize; // Reserve 1 line for overflow
|
|
||||||
lines.extend(storage_lines.iter().take(lines_to_show).cloned());
|
let backup_lines = self.render_backup();
|
||||||
|
lines.extend(backup_lines);
|
||||||
// Count hidden pools
|
|
||||||
let mut hidden_pools = 0;
|
|
||||||
let mut current_pool = String::new();
|
|
||||||
for (i, line) in storage_lines.iter().enumerate() {
|
|
||||||
if i >= lines_to_show {
|
|
||||||
// Check if this line represents a new pool (no indentation)
|
|
||||||
if let Some(first_span) = line.spans.first() {
|
|
||||||
let text = first_span.content.as_ref();
|
|
||||||
if !text.starts_with(" ") && text.contains(':') {
|
|
||||||
let pool_name = text.split(':').next().unwrap_or("").trim();
|
|
||||||
if pool_name != current_pool {
|
|
||||||
hidden_pools += 1;
|
|
||||||
current_pool = pool_name.to_string();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if hidden_pools > 0 {
|
|
||||||
let overflow_text = format!(
|
|
||||||
"... and {} more pool{}",
|
|
||||||
hidden_pools,
|
|
||||||
if hidden_pools == 1 { "" } else { "s" }
|
|
||||||
);
|
|
||||||
lines.push(Line::from(vec![
|
|
||||||
Span::styled(overflow_text, Typography::muted())
|
|
||||||
]));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply scroll offset
|
// Apply scroll offset
|
||||||
let total_lines = lines.len();
|
let total_lines = lines.len();
|
||||||
let available_height = area.height as usize;
|
let available_height = area.height as usize;
|
||||||
|
|
||||||
// Always apply scrolling if scroll_offset > 0, even if content fits
|
// Clamp scroll_offset to valid range based on current viewport and content
|
||||||
if scroll_offset > 0 || total_lines > available_height {
|
// This handles dynamic viewport size changes
|
||||||
let max_scroll = if total_lines > available_height {
|
let max_valid_scroll = total_lines.saturating_sub(available_height);
|
||||||
total_lines - available_height
|
let clamped_scroll = self.scroll_offset.min(max_valid_scroll);
|
||||||
} else {
|
|
||||||
total_lines.saturating_sub(1)
|
// Calculate how many lines remain after scroll offset
|
||||||
};
|
let remaining_lines = total_lines.saturating_sub(clamped_scroll);
|
||||||
let effective_scroll = scroll_offset.min(max_scroll);
|
|
||||||
|
// Check if all remaining content fits in viewport
|
||||||
// Take only the visible portion after scrolling
|
let will_show_more_below = remaining_lines > available_height;
|
||||||
let visible_lines: Vec<Line> = lines
|
|
||||||
.into_iter()
|
// Reserve one line for "X more below" only if we can't fit everything
|
||||||
.skip(effective_scroll)
|
let lines_for_content = if will_show_more_below {
|
||||||
.take(available_height)
|
available_height.saturating_sub(1)
|
||||||
.collect();
|
|
||||||
|
|
||||||
let paragraph = Paragraph::new(Text::from(visible_lines));
|
|
||||||
frame.render_widget(paragraph, area);
|
|
||||||
} else {
|
} else {
|
||||||
// All content fits and no scroll offset, render normally
|
available_height.min(remaining_lines)
|
||||||
let paragraph = Paragraph::new(Text::from(lines));
|
};
|
||||||
frame.render_widget(paragraph, area);
|
|
||||||
|
// Apply clamped scroll offset and take only what fits
|
||||||
|
let mut visible_lines: Vec<Line> = lines
|
||||||
|
.into_iter()
|
||||||
|
.skip(clamped_scroll)
|
||||||
|
.take(lines_for_content)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Note: we don't update self.scroll_offset here due to borrow checker constraints
|
||||||
|
// It will be clamped on next render if still out of bounds
|
||||||
|
|
||||||
|
// Only calculate hidden_below if we actually reserved space for the message
|
||||||
|
let hidden_below = if will_show_more_below {
|
||||||
|
remaining_lines.saturating_sub(lines_for_content)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add "more below" message if needed
|
||||||
|
if hidden_below > 0 {
|
||||||
|
let more_line = Line::from(vec![
|
||||||
|
Span::styled(format!("... {} more below", hidden_below), Style::default().fg(Theme::border()))
|
||||||
|
]);
|
||||||
|
visible_lines.push(more_line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let paragraph = Paragraph::new(Text::from(visible_lines));
|
||||||
|
frame.render_widget(paragraph, area);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.0"
|
version = "0.1.275"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
245
shared/src/agent_data.rs
Normal file
245
shared/src/agent_data.rs
Normal file
@ -0,0 +1,245 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use crate::Status;
|
||||||
|
|
||||||
|
/// Complete structured data from an agent
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct AgentData {
|
||||||
|
pub hostname: String,
|
||||||
|
pub agent_version: String,
|
||||||
|
pub build_version: Option<String>,
|
||||||
|
pub timestamp: u64,
|
||||||
|
pub system: SystemData,
|
||||||
|
pub services: Vec<ServiceData>,
|
||||||
|
pub backup: BackupData,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// System-level monitoring data
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SystemData {
|
||||||
|
pub network: NetworkData,
|
||||||
|
pub cpu: CpuData,
|
||||||
|
pub memory: MemoryData,
|
||||||
|
pub storage: StorageData,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Network interface monitoring data
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct NetworkData {
|
||||||
|
pub interfaces: Vec<NetworkInterfaceData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Individual network interface data
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct NetworkInterfaceData {
|
||||||
|
pub name: String,
|
||||||
|
pub ipv4_addresses: Vec<String>,
|
||||||
|
pub ipv6_addresses: Vec<String>,
|
||||||
|
pub is_physical: bool,
|
||||||
|
pub link_status: Status,
|
||||||
|
pub parent_interface: Option<String>,
|
||||||
|
pub vlan_id: Option<u16>,
|
||||||
|
pub connection_method: Option<String>, // For Tailscale: "direct", "relay", or "proxy"
|
||||||
|
}
|
||||||
|
|
||||||
|
/// CPU C-state usage information
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct CStateInfo {
|
||||||
|
pub name: String,
|
||||||
|
pub percent: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// CPU monitoring data
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct CpuData {
|
||||||
|
pub load_1min: f32,
|
||||||
|
pub load_5min: f32,
|
||||||
|
pub load_15min: f32,
|
||||||
|
pub cstates: Vec<CStateInfo>, // C-state usage percentages (C1, C6, C10, etc.) - indicates CPU idle depth distribution
|
||||||
|
pub temperature_celsius: Option<f32>,
|
||||||
|
pub load_status: Status,
|
||||||
|
pub temperature_status: Status,
|
||||||
|
// Static CPU information (collected once at startup)
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub model_name: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub core_count: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Memory monitoring data
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct MemoryData {
|
||||||
|
pub usage_percent: f32,
|
||||||
|
pub total_gb: f32,
|
||||||
|
pub used_gb: f32,
|
||||||
|
pub available_gb: f32,
|
||||||
|
pub swap_total_gb: f32,
|
||||||
|
pub swap_used_gb: f32,
|
||||||
|
pub tmpfs: Vec<TmpfsData>,
|
||||||
|
pub usage_status: Status,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tmpfs filesystem data
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct TmpfsData {
|
||||||
|
pub mount: String,
|
||||||
|
pub usage_percent: f32,
|
||||||
|
pub used_gb: f32,
|
||||||
|
pub total_gb: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Storage monitoring data
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct StorageData {
|
||||||
|
pub drives: Vec<DriveData>,
|
||||||
|
pub pools: Vec<PoolData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Individual drive data
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct DriveData {
|
||||||
|
pub name: String,
|
||||||
|
pub serial_number: Option<String>,
|
||||||
|
pub health: String,
|
||||||
|
pub temperature_celsius: Option<f32>,
|
||||||
|
pub wear_percent: Option<f32>,
|
||||||
|
pub filesystems: Vec<FilesystemData>,
|
||||||
|
pub temperature_status: Status,
|
||||||
|
pub health_status: Status,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Filesystem on a drive
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct FilesystemData {
|
||||||
|
pub mount: String,
|
||||||
|
pub usage_percent: f32,
|
||||||
|
pub used_gb: f32,
|
||||||
|
pub total_gb: f32,
|
||||||
|
pub usage_status: Status,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Storage pool (MergerFS, RAID, etc.)
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct PoolData {
|
||||||
|
pub name: String,
|
||||||
|
pub mount: String,
|
||||||
|
pub pool_type: String, // "mergerfs", "raid", etc.
|
||||||
|
pub health: String,
|
||||||
|
pub usage_percent: f32,
|
||||||
|
pub used_gb: f32,
|
||||||
|
pub total_gb: f32,
|
||||||
|
pub data_drives: Vec<PoolDriveData>,
|
||||||
|
pub parity_drives: Vec<PoolDriveData>,
|
||||||
|
pub health_status: Status,
|
||||||
|
pub usage_status: Status,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Drive in a storage pool
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct PoolDriveData {
|
||||||
|
pub name: String,
|
||||||
|
pub serial_number: Option<String>,
|
||||||
|
pub temperature_celsius: Option<f32>,
|
||||||
|
pub wear_percent: Option<f32>,
|
||||||
|
pub health: String,
|
||||||
|
pub health_status: Status,
|
||||||
|
pub temperature_status: Status,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Service monitoring data
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ServiceData {
|
||||||
|
pub name: String,
|
||||||
|
pub user_stopped: bool,
|
||||||
|
pub service_status: Status,
|
||||||
|
pub sub_services: Vec<SubServiceData>,
|
||||||
|
/// Memory usage in bytes (from MemoryCurrent)
|
||||||
|
pub memory_bytes: Option<u64>,
|
||||||
|
/// Number of service restarts (from NRestarts)
|
||||||
|
pub restart_count: Option<u32>,
|
||||||
|
/// Uptime in seconds (calculated from ExecMainStartTimestamp)
|
||||||
|
pub uptime_seconds: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sub-service data (nginx sites, docker containers, etc.)
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SubServiceData {
|
||||||
|
pub name: String,
|
||||||
|
pub service_status: Status,
|
||||||
|
pub metrics: Vec<SubServiceMetric>,
|
||||||
|
/// Type of sub-service: "nginx_site", "container", "image"
|
||||||
|
#[serde(default)]
|
||||||
|
pub service_type: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Individual metric for a sub-service
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SubServiceMetric {
|
||||||
|
pub label: String,
|
||||||
|
pub value: f32,
|
||||||
|
pub unit: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Backup system data
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct BackupData {
|
||||||
|
pub last_backup_time: Option<String>,
|
||||||
|
pub backup_status: Status,
|
||||||
|
pub repositories: Vec<BackupRepositoryData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Individual backup repository information
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct BackupRepositoryData {
|
||||||
|
pub name: String,
|
||||||
|
pub archive_count: i64,
|
||||||
|
pub repo_size_gb: f32,
|
||||||
|
pub status: Status,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AgentData {
|
||||||
|
/// Create new agent data with current timestamp
|
||||||
|
pub fn new(hostname: String, agent_version: String) -> Self {
|
||||||
|
Self {
|
||||||
|
hostname,
|
||||||
|
agent_version,
|
||||||
|
build_version: None,
|
||||||
|
timestamp: chrono::Utc::now().timestamp() as u64,
|
||||||
|
system: SystemData {
|
||||||
|
network: NetworkData {
|
||||||
|
interfaces: Vec::new(),
|
||||||
|
},
|
||||||
|
cpu: CpuData {
|
||||||
|
load_1min: 0.0,
|
||||||
|
load_5min: 0.0,
|
||||||
|
load_15min: 0.0,
|
||||||
|
cstates: Vec::new(),
|
||||||
|
temperature_celsius: None,
|
||||||
|
load_status: Status::Unknown,
|
||||||
|
temperature_status: Status::Unknown,
|
||||||
|
model_name: None,
|
||||||
|
core_count: None,
|
||||||
|
},
|
||||||
|
memory: MemoryData {
|
||||||
|
usage_percent: 0.0,
|
||||||
|
total_gb: 0.0,
|
||||||
|
used_gb: 0.0,
|
||||||
|
available_gb: 0.0,
|
||||||
|
swap_total_gb: 0.0,
|
||||||
|
swap_used_gb: 0.0,
|
||||||
|
tmpfs: Vec::new(),
|
||||||
|
usage_status: Status::Unknown,
|
||||||
|
},
|
||||||
|
storage: StorageData {
|
||||||
|
drives: Vec::new(),
|
||||||
|
pools: Vec::new(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
services: Vec::new(),
|
||||||
|
backup: BackupData {
|
||||||
|
last_backup_time: None,
|
||||||
|
backup_status: Status::Unknown,
|
||||||
|
repositories: Vec::new(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,8 +1,10 @@
|
|||||||
|
pub mod agent_data;
|
||||||
pub mod cache;
|
pub mod cache;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod metrics;
|
pub mod metrics;
|
||||||
pub mod protocol;
|
pub mod protocol;
|
||||||
|
|
||||||
|
pub use agent_data::*;
|
||||||
pub use cache::*;
|
pub use cache::*;
|
||||||
pub use error::*;
|
pub use error::*;
|
||||||
pub use metrics::*;
|
pub use metrics::*;
|
||||||
|
|||||||
@ -82,11 +82,14 @@ impl MetricValue {
|
|||||||
/// Health status for metrics
|
/// Health status for metrics
|
||||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub enum Status {
|
pub enum Status {
|
||||||
Ok,
|
Info, // Lowest priority - informational data with no status (no icon)
|
||||||
Pending,
|
Inactive, //
|
||||||
Warning,
|
Unknown, //
|
||||||
Critical,
|
Offline, //
|
||||||
Unknown,
|
Pending, //
|
||||||
|
Ok, // Good status has higher priority than unknown states
|
||||||
|
Warning, //
|
||||||
|
Critical, // Highest priority
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Status {
|
impl Status {
|
||||||
@ -129,6 +132,17 @@ impl HysteresisThresholds {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Evaluate value against thresholds to determine status
|
||||||
|
pub fn evaluate(&self, value: f32) -> Status {
|
||||||
|
if value >= self.critical_high {
|
||||||
|
Status::Critical
|
||||||
|
} else if value >= self.warning_high {
|
||||||
|
Status::Warning
|
||||||
|
} else {
|
||||||
|
Status::Ok
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn with_custom_gaps(warning_high: f32, warning_gap: f32, critical_high: f32, critical_gap: f32) -> Self {
|
pub fn with_custom_gaps(warning_high: f32, warning_gap: f32, critical_high: f32, critical_gap: f32) -> Self {
|
||||||
Self {
|
Self {
|
||||||
warning_high,
|
warning_high,
|
||||||
@ -180,6 +194,16 @@ impl HysteresisThresholds {
|
|||||||
Status::Ok
|
Status::Ok
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Status::Inactive => {
|
||||||
|
// Inactive services use normal thresholds like first measurement
|
||||||
|
if value >= self.critical_high {
|
||||||
|
Status::Critical
|
||||||
|
} else if value >= self.warning_high {
|
||||||
|
Status::Warning
|
||||||
|
} else {
|
||||||
|
Status::Ok
|
||||||
|
}
|
||||||
|
}
|
||||||
Status::Pending => {
|
Status::Pending => {
|
||||||
// Service transitioning, use normal thresholds like first measurement
|
// Service transitioning, use normal thresholds like first measurement
|
||||||
if value >= self.critical_high {
|
if value >= self.critical_high {
|
||||||
@ -190,6 +214,27 @@ impl HysteresisThresholds {
|
|||||||
Status::Ok
|
Status::Ok
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Status::Offline => {
|
||||||
|
// Host coming back online, use normal thresholds like first measurement
|
||||||
|
if value >= self.critical_high {
|
||||||
|
Status::Critical
|
||||||
|
} else if value >= self.warning_high {
|
||||||
|
Status::Warning
|
||||||
|
} else {
|
||||||
|
Status::Ok
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Status::Info => {
|
||||||
|
// Informational data shouldn't be used with hysteresis calculations
|
||||||
|
// Treat like Unknown if it somehow ends up here
|
||||||
|
if value >= self.critical_high {
|
||||||
|
Status::Critical
|
||||||
|
} else if value >= self.warning_high {
|
||||||
|
Status::Warning
|
||||||
|
} else {
|
||||||
|
Status::Ok
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,20 +1,31 @@
|
|||||||
use crate::metrics::Metric;
|
use crate::agent_data::AgentData;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
/// Message sent from agent to dashboard via ZMQ
|
/// Message sent from agent to dashboard via ZMQ
|
||||||
|
/// Always structured data - no legacy metrics support
|
||||||
|
pub type AgentMessage = AgentData;
|
||||||
|
|
||||||
|
/// Command output streaming message
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct MetricMessage {
|
pub struct CommandOutputMessage {
|
||||||
pub hostname: String,
|
pub hostname: String,
|
||||||
|
pub command_id: String,
|
||||||
|
pub command_type: String,
|
||||||
|
pub output_line: String,
|
||||||
|
pub is_complete: bool,
|
||||||
pub timestamp: u64,
|
pub timestamp: u64,
|
||||||
pub metrics: Vec<Metric>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MetricMessage {
|
|
||||||
pub fn new(hostname: String, metrics: Vec<Metric>) -> Self {
|
impl CommandOutputMessage {
|
||||||
|
pub fn new(hostname: String, command_id: String, command_type: String, output_line: String, is_complete: bool) -> Self {
|
||||||
Self {
|
Self {
|
||||||
hostname,
|
hostname,
|
||||||
|
command_id,
|
||||||
|
command_type,
|
||||||
|
output_line,
|
||||||
|
is_complete,
|
||||||
timestamp: chrono::Utc::now().timestamp() as u64,
|
timestamp: chrono::Utc::now().timestamp() as u64,
|
||||||
metrics,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -35,8 +46,8 @@ pub enum Command {
|
|||||||
pub enum CommandResponse {
|
pub enum CommandResponse {
|
||||||
/// Acknowledgment of command
|
/// Acknowledgment of command
|
||||||
Ack,
|
Ack,
|
||||||
/// Metrics response
|
/// Agent data response
|
||||||
Metrics(Vec<Metric>),
|
AgentData(AgentData),
|
||||||
/// Pong response to ping
|
/// Pong response to ping
|
||||||
Pong,
|
Pong,
|
||||||
/// Error response
|
/// Error response
|
||||||
@ -52,17 +63,18 @@ pub struct MessageEnvelope {
|
|||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
pub enum MessageType {
|
pub enum MessageType {
|
||||||
Metrics,
|
AgentData,
|
||||||
Command,
|
Command,
|
||||||
CommandResponse,
|
CommandResponse,
|
||||||
|
CommandOutput,
|
||||||
Heartbeat,
|
Heartbeat,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MessageEnvelope {
|
impl MessageEnvelope {
|
||||||
pub fn metrics(message: MetricMessage) -> Result<Self, crate::SharedError> {
|
pub fn agent_data(data: AgentData) -> Result<Self, crate::SharedError> {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
message_type: MessageType::Metrics,
|
message_type: MessageType::AgentData,
|
||||||
payload: serde_json::to_vec(&message)?,
|
payload: serde_json::to_vec(&data)?,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,6 +92,13 @@ impl MessageEnvelope {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn command_output(message: CommandOutputMessage) -> Result<Self, crate::SharedError> {
|
||||||
|
Ok(Self {
|
||||||
|
message_type: MessageType::CommandOutput,
|
||||||
|
payload: serde_json::to_vec(&message)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn heartbeat() -> Result<Self, crate::SharedError> {
|
pub fn heartbeat() -> Result<Self, crate::SharedError> {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
message_type: MessageType::Heartbeat,
|
message_type: MessageType::Heartbeat,
|
||||||
@ -87,11 +106,11 @@ impl MessageEnvelope {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decode_metrics(&self) -> Result<MetricMessage, crate::SharedError> {
|
pub fn decode_agent_data(&self) -> Result<AgentData, crate::SharedError> {
|
||||||
match self.message_type {
|
match self.message_type {
|
||||||
MessageType::Metrics => Ok(serde_json::from_slice(&self.payload)?),
|
MessageType::AgentData => Ok(serde_json::from_slice(&self.payload)?),
|
||||||
_ => Err(crate::SharedError::Protocol {
|
_ => Err(crate::SharedError::Protocol {
|
||||||
message: "Expected metrics message".to_string(),
|
message: "Expected agent data message".to_string(),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -113,4 +132,13 @@ impl MessageEnvelope {
|
|||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn decode_command_output(&self) -> Result<CommandOutputMessage, crate::SharedError> {
|
||||||
|
match self.message_type {
|
||||||
|
MessageType::CommandOutput => Ok(serde_json::from_slice(&self.payload)?),
|
||||||
|
_ => Err(crate::SharedError::Protocol {
|
||||||
|
message: "Expected command output message".to_string(),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user