Enhance disk collector with individual drive health monitoring

- Add StoragePool and DriveInfo structures for grouping drives by mount point
- Implement SMART data collection for individual drives (health, temperature, wear)
- Support for ext4, zfs, xfs, mergerfs, btrfs filesystem types
- Generate individual drive metrics: disk_[pool]_[drive]_health/temperature/wear
- Add storage_type and underlying_devices to filesystem configuration
- Move hardcoded service directory mappings to NixOS configuration
- Move hardcoded host-to-user mapping to NixOS configuration
- Remove all unused code and fix compilation warnings
- Clean implementation with zero warnings and no dead code

Individual drives now show health status per storage pool:
Storage root (ext4): nvme0n1 PASSED 42°C 5% wear
Storage steampool (mergerfs): sda/sdb/sdc with individual health data
This commit is contained in:
2025-10-22 19:59:25 +02:00
parent a6c2983f65
commit 08d3454683
4 changed files with 332 additions and 433 deletions

View File

@@ -7,11 +7,14 @@ use std::time::Instant;
use tracing::debug;
use super::{Collector, CollectorError};
use crate::config::SystemdConfig;
/// Systemd collector for monitoring systemd services
pub struct SystemdCollector {
/// Cached state with thread-safe interior mutability
state: RwLock<ServiceCacheState>,
/// Configuration for service monitoring
config: SystemdConfig,
}
/// Internal state for service caching
@@ -32,7 +35,7 @@ struct ServiceCacheState {
}
impl SystemdCollector {
pub fn new() -> Self {
pub fn new(config: SystemdConfig) -> Self {
Self {
state: RwLock::new(ServiceCacheState {
monitored_services: Vec::new(),
@@ -42,6 +45,7 @@ impl SystemdCollector {
last_nginx_check_time: None,
nginx_check_interval_seconds: 30, // 30 seconds for nginx sites
}),
config,
}
}
@@ -128,13 +132,8 @@ impl SystemdCollector {
.arg("--plain")
.output()?;
// Get hostname to determine which user to check
let hostname = gethostname::gethostname().to_string_lossy().to_string();
let target_user = match hostname.as_str() {
"steambox" | "cmbox" => "cm",
"simonbox" => "simon",
_ => "cm", // default to cm for unknown hosts
};
// Use configured user mapping instead of hardcoded hostname logic
let target_user = &self.config.host_user_mapping;
// Also get user unit files (user-level services) for target user
let user_unit_files_output = Command::new("sudo")
@@ -183,68 +182,9 @@ impl SystemdCollector {
};
let mut services = Vec::new();
// Skip setup/certificate services that don't need monitoring (from legacy)
let excluded_services = [
"mosquitto-certs",
"immich-setup",
"phpfpm-kryddorten",
"phpfpm-mariehall2",
"acme-haasp.net",
"acme-selfsigned-haasp",
"borgbackup",
"haasp-site-deploy",
"mosquitto-backup",
"nginx-config-reload",
"sshd-keygen",
"sshd-unix-local@",
"sshd@",
"docker-prune",
"docker-registry-gar",
"ark-permissions",
];
// Define patterns for services we want to monitor (from legacy)
let service_name_filters = [
// Web applications
"gitea",
"immich",
"vaultwarden",
"unifi",
"wordpress",
"nginx",
"httpd",
// Databases
"postgresql",
"mysql",
"mariadb",
"redis",
"mongodb",
"mongod",
// Backup and storage
"borg",
"rclone",
// Container runtimes
"docker",
// CI/CD services
"gitea-actions",
"gitea-runner",
"actions-runner",
// Network services
"sshd",
"dnsmasq",
// MQTT and IoT services
"mosquitto",
"mqtt",
// PHP-FPM services
"phpfpm",
// Home automation
"haasp",
// Backup services
"backup",
// Game servers
"ark",
"sunshine",
];
// Use configuration instead of hardcoded values
let excluded_services = &self.config.excluded_services;
let service_name_filters = &self.config.service_name_filters;
// Parse both unit files and loaded units
let mut all_service_names = std::collections::HashSet::new();
@@ -295,7 +235,7 @@ impl SystemdCollector {
// Skip excluded services first
let mut is_excluded = false;
for excluded in &excluded_services {
for excluded in excluded_services {
if service_name.contains(excluded) {
debug!(
"EXCLUDING service '{}' because it matches pattern '{}'",
@@ -312,7 +252,7 @@ impl SystemdCollector {
}
// Check if this service matches our filter patterns
for pattern in &service_name_filters {
for pattern in service_name_filters {
if service_name.contains(pattern) || pattern.contains(service_name) {
debug!(
"INCLUDING service '{}' because it matches pattern '{}'",
@@ -411,22 +351,21 @@ impl SystemdCollector {
}
}
/// Get service disk usage - simple and deterministic
/// Get service disk usage - simplified and configuration-driven
fn get_service_disk_usage(&self, service: &str) -> Option<f32> {
// 1. Check if service has defined directories
let defined_dirs = self.get_service_directories(service);
if !defined_dirs.is_empty() {
// Service has defined paths - use ONLY those
for dir in defined_dirs {
// 1. Check if service has configured directories (exact match only)
if let Some(dirs) = self.config.service_directories.get(service) {
// Service has configured paths - use the first accessible one
for dir in dirs {
if let Some(size) = self.get_directory_size(dir) {
return Some(size);
}
}
// If defined path failed, return None (shows as "-")
return None;
// If configured paths failed, return None (shows as 0)
return Some(0.0);
}
// 2. No defined path - use systemctl WorkingDirectory
// 2. No configured path - use systemctl WorkingDirectory
let output = Command::new("systemctl")
.arg("show")
.arg(format!("{}.service", service))
@@ -447,36 +386,6 @@ impl SystemdCollector {
None
}
/// Get defined service directories (highest priority)
fn get_service_directories(&self, service: &str) -> Vec<&str> {
match service {
// Game servers (ARK Survival Ascended) - HIGHEST PRIORITY
"ark-island" => vec!["/var/lib/ark-servers/island"],
"ark-scorched" => vec!["/var/lib/ark-servers/scorched"],
"ark-center" => vec!["/var/lib/ark-servers/center"],
"ark-aberration" => vec!["/var/lib/ark-servers/aberration"],
"ark-extinction" => vec!["/var/lib/ark-servers/extinction"],
"ark-ragnarok" => vec!["/var/lib/ark-servers/ragnarok"],
"ark-valguero" => vec!["/var/lib/ark-servers/valguero"],
// Other services with defined paths
s if s.contains("docker") => vec!["/var/lib/docker", "/var/lib/docker/containers"],
s if s.contains("gitea") => vec!["/var/lib/gitea", "/opt/gitea", "/home/git", "/data/gitea"],
s if s.contains("nginx") => vec!["/var/log/nginx", "/var/www", "/usr/share/nginx"],
s if s.contains("immich") => vec!["/var/lib/immich", "/opt/immich", "/usr/src/app/upload"],
s if s.contains("postgres") => vec!["/var/lib/postgresql", "/var/lib/postgres"],
s if s.contains("mysql") => vec!["/var/lib/mysql"],
s if s.contains("redis") => vec!["/var/lib/redis", "/var/redis"],
s if s.contains("unifi") => vec!["/var/lib/unifi", "/opt/UniFi"],
s if s.contains("vaultwarden") => vec!["/var/lib/vaultwarden", "/opt/vaultwarden"],
s if s.contains("mosquitto") => vec!["/var/lib/mosquitto", "/etc/mosquitto"],
s if s.contains("postfix") => vec!["/var/spool/postfix", "/var/lib/postfix"],
// No defined path - will fall back to systemctl WorkingDirectory
_ => vec![],
}
}