Implement per-service disk usage monitoring

Replaced system-wide disk usage with accurate per-service tracking by scanning
service-specific directories. Services like sshd now correctly show minimal
disk usage instead of misleading system totals.

- Rename storage widget and add drive capacity/usage columns
- Move host display to main dashboard title for cleaner layout
- Replace separate alert displays with color-coded row highlighting
- Add per-service disk usage collection using du command
- Update services widget formatting to handle small disk values
- Restructure into workspace with dedicated agent and dashboard packages
This commit is contained in:
2025-10-11 22:59:16 +02:00
parent 82afe3d4f1
commit 2581435b10
30 changed files with 4801 additions and 446 deletions

View File

@@ -6,7 +6,10 @@ mod ui;
use std::fs;
use std::io::{self, Stdout};
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use std::sync::{
atomic::{AtomicBool, Ordering},
Arc, OnceLock,
};
use std::time::Duration;
use crate::data::metrics::{BackupMetrics, ServiceMetrics, SmartMetrics};
@@ -100,8 +103,14 @@ async fn main() -> Result<()> {
let mut app = App::new(options)?;
let (event_tx, mut event_rx) = unbounded_channel();
let shutdown_flag = Arc::new(AtomicBool::new(false));
let zmq_task = if let Some(context) = app.zmq_context() {
Some(spawn_metrics_task(context, event_tx.clone()))
Some(spawn_metrics_task(
context,
event_tx.clone(),
shutdown_flag.clone(),
))
} else {
None
};
@@ -109,9 +118,12 @@ async fn main() -> Result<()> {
let mut terminal = setup_terminal()?;
let result = run_app(&mut terminal, &mut app, &mut event_rx);
teardown_terminal(terminal)?;
shutdown_flag.store(true, Ordering::Relaxed);
let _ = event_tx.send(AppEvent::Shutdown);
if let Some(handle) = zmq_task {
handle.abort();
if let Err(join_error) = handle.await {
warn!(%join_error, "ZMQ metrics task ended unexpectedly");
}
}
result
}
@@ -206,9 +218,13 @@ fn prepare_log_writer() -> Result<tracing_appender::non_blocking::NonBlocking> {
Ok(non_blocking)
}
fn spawn_metrics_task(context: ZmqContext, sender: UnboundedSender<AppEvent>) -> JoinHandle<()> {
fn spawn_metrics_task(
context: ZmqContext,
sender: UnboundedSender<AppEvent>,
shutdown: Arc<AtomicBool>,
) -> JoinHandle<()> {
tokio::spawn(async move {
match spawn_blocking(move || metrics_blocking_loop(context, sender)).await {
match spawn_blocking(move || metrics_blocking_loop(context, sender, shutdown)).await {
Ok(Ok(())) => {}
Ok(Err(error)) => warn!(%error, "ZMQ metrics worker exited with error"),
Err(join_error) => warn!(%join_error, "ZMQ metrics worker panicked"),
@@ -216,12 +232,23 @@ fn spawn_metrics_task(context: ZmqContext, sender: UnboundedSender<AppEvent>) ->
})
}
fn metrics_blocking_loop(context: ZmqContext, sender: UnboundedSender<AppEvent>) -> Result<()> {
fn metrics_blocking_loop(
context: ZmqContext,
sender: UnboundedSender<AppEvent>,
shutdown: Arc<AtomicBool>,
) -> Result<()> {
let zmq_context = NativeZmqContext::new();
let socket = zmq_context
.socket(zmq::SUB)
.context("failed to create ZMQ SUB socket")?;
socket
.set_linger(0)
.context("failed to configure ZMQ linger")?;
socket
.set_rcvtimeo(1_000)
.context("failed to configure ZMQ receive timeout")?;
for endpoint in context.endpoints() {
debug!(%endpoint, "connecting to ZMQ endpoint");
socket
@@ -239,7 +266,7 @@ fn metrics_blocking_loop(context: ZmqContext, sender: UnboundedSender<AppEvent>)
.context("failed to subscribe to all ZMQ topics")?;
}
loop {
while !shutdown.load(Ordering::Relaxed) {
match socket.recv_msg(0) {
Ok(message) => {
if let Err(error) = handle_zmq_message(&message, &sender) {
@@ -247,11 +274,18 @@ fn metrics_blocking_loop(context: ZmqContext, sender: UnboundedSender<AppEvent>)
}
}
Err(error) => {
if error == zmq::Error::EAGAIN {
continue;
}
warn!(%error, "ZMQ receive error");
std::thread::sleep(Duration::from_secs(1));
std::thread::sleep(Duration::from_millis(250));
}
}
}
debug!("ZMQ metrics worker shutting down");
Ok(())
}
fn handle_zmq_message(
@@ -442,7 +476,7 @@ tick_rate_ms = 250
history_duration_minutes = 60
[[dashboard.widgets]]
id = "nvme"
id = "storage"
enabled = true
[[dashboard.widgets]]