Remove hardcoded defaults, require configuration file

- Remove all Default implementations from agent configuration structs
- Make configuration file required for agent startup
- Update NixOS module to generate complete agent.toml configuration
- Add comprehensive configuration options to NixOS module including:
  - Service include/exclude patterns for systemd collector
  - All thresholds and intervals
  - ZMQ communication settings
  - Notification and cache configuration
- Agent now fails fast if no configuration provided
- Eliminates configuration drift between defaults and NixOS settings
This commit is contained in:
Christoffer Martinsson 2025-10-21 00:01:26 +02:00
parent 1e8da8c187
commit a937032eb1
6 changed files with 46 additions and 347 deletions

View File

@ -311,8 +311,12 @@ Agent → ["cpu_load_1min", "memory_usage_percent", ...] → Dashboard → Widge
- CLI: `cm-dashboard-agent [-v]` (intelligent caching enabled)
**Service Discovery:**
- Scans running systemd services
- Filters by predefined interesting patterns (gitea, nginx, docker, etc.)
- Scans ALL systemd services (active, inactive, failed, dead, etc.) using list-unit-files and list-units --all
- Discovers both system services and user services per host:
- steambox/cmbox: reads system + cm user services
- simonbox: reads system + simon user services
- Filters by service_name_filters patterns (gitea, nginx, docker, sunshine, etc.)
- Excludes maintenance services (docker-prune, sshd@, ark-permissions, etc.)
- No host-specific hardcoded service lists
### Current Implementation Status

View File

@ -23,12 +23,9 @@ impl Agent {
let hostname = gethostname().to_string_lossy().to_string();
info!("Initializing agent for host: {}", hostname);
// Load configuration
let config = if let Some(path) = config_path {
AgentConfig::load_from_file(&path)?
} else {
AgentConfig::default()
};
// Load configuration (now required)
let config_path = config_path.ok_or_else(|| anyhow::anyhow!("Configuration file path is required"))?;
let config = AgentConfig::from_file(&config_path)?;
info!("Agent configuration loaded");

View File

@ -128,8 +128,19 @@ impl SystemdCollector {
.arg("--plain")
.output()?;
// Also get user unit files (user-level services)
let user_unit_files_output = Command::new("systemctl")
// Get hostname to determine which user to check
let hostname = gethostname::gethostname().to_string_lossy().to_string();
let target_user = match hostname.as_str() {
"steambox" | "cmbox" => "cm",
"simonbox" => "simon",
_ => "cm", // default to cm for unknown hosts
};
// Also get user unit files (user-level services) for target user
let user_unit_files_output = Command::new("sudo")
.arg("-u")
.arg(target_user)
.arg("systemctl")
.arg("--user")
.arg("list-unit-files")
.arg("--type=service")
@ -137,8 +148,11 @@ impl SystemdCollector {
.arg("--plain")
.output()?;
// And user loaded units
let user_units_output = Command::new("systemctl")
// And user loaded units for target user
let user_units_output = Command::new("sudo")
.arg("-u")
.arg(target_user)
.arg("systemctl")
.arg("--user")
.arg("list-units")
.arg("--type=service")

View File

@ -1,54 +1,2 @@
// Collection intervals
pub const DEFAULT_COLLECTION_INTERVAL_SECONDS: u64 = 2;
pub const DEFAULT_CPU_INTERVAL_SECONDS: u64 = 5;
pub const DEFAULT_MEMORY_INTERVAL_SECONDS: u64 = 5;
pub const DEFAULT_DISK_INTERVAL_SECONDS: u64 = 300; // 5 minutes
pub const DEFAULT_PROCESS_INTERVAL_SECONDS: u64 = 30;
pub const DEFAULT_SYSTEMD_INTERVAL_SECONDS: u64 = 30;
pub const DEFAULT_SMART_INTERVAL_SECONDS: u64 = 900; // 15 minutes
pub const DEFAULT_BACKUP_INTERVAL_SECONDS: u64 = 900; // 15 minutes
pub const DEFAULT_NETWORK_INTERVAL_SECONDS: u64 = 30;
// ZMQ configuration
pub const DEFAULT_ZMQ_PUBLISHER_PORT: u16 = 6130;
pub const DEFAULT_ZMQ_COMMAND_PORT: u16 = 6131;
pub const DEFAULT_ZMQ_BIND_ADDRESS: &str = "0.0.0.0";
pub const DEFAULT_ZMQ_TIMEOUT_MS: u64 = 5000;
pub const DEFAULT_ZMQ_HEARTBEAT_INTERVAL_MS: u64 = 30000;
// CPU thresholds (production values from legacy)
pub const DEFAULT_CPU_LOAD_WARNING: f32 = 9.0;
pub const DEFAULT_CPU_LOAD_CRITICAL: f32 = 10.0;
pub const DEFAULT_CPU_TEMP_WARNING: f32 = 100.0; // Effectively disabled
pub const DEFAULT_CPU_TEMP_CRITICAL: f32 = 100.0; // Effectively disabled
// Memory thresholds (from legacy)
pub const DEFAULT_MEMORY_WARNING_PERCENT: f32 = 80.0;
pub const DEFAULT_MEMORY_CRITICAL_PERCENT: f32 = 95.0;
// Disk thresholds
pub const DEFAULT_DISK_WARNING_PERCENT: f32 = 80.0;
pub const DEFAULT_DISK_CRITICAL_PERCENT: f32 = 90.0;
// Process configuration
pub const DEFAULT_TOP_PROCESSES_COUNT: usize = 10;
// Service thresholds
pub const DEFAULT_SERVICE_MEMORY_WARNING_MB: f32 = 1000.0;
pub const DEFAULT_SERVICE_MEMORY_CRITICAL_MB: f32 = 2000.0;
// SMART thresholds
pub const DEFAULT_SMART_TEMP_WARNING: f32 = 60.0;
pub const DEFAULT_SMART_TEMP_CRITICAL: f32 = 70.0;
pub const DEFAULT_SMART_WEAR_WARNING: f32 = 80.0;
pub const DEFAULT_SMART_WEAR_CRITICAL: f32 = 90.0;
// Backup configuration
pub const DEFAULT_BACKUP_MAX_AGE_HOURS: u64 = 48;
// Notification configuration (from legacy)
pub const DEFAULT_SMTP_HOST: &str = "localhost";
pub const DEFAULT_SMTP_PORT: u16 = 25;
pub const DEFAULT_FROM_EMAIL: &str = "{hostname}@cmtec.se";
pub const DEFAULT_TO_EMAIL: &str = "cm@cmtec.se";
pub const DEFAULT_NOTIFICATION_RATE_LIMIT_MINUTES: u64 = 0;
// This file is now empty - all configuration values come from config files
// No hardcoded defaults are used

View File

@ -3,12 +3,9 @@ use cm_dashboard_shared::CacheConfig;
use serde::{Deserialize, Serialize};
use std::path::Path;
pub mod defaults;
pub mod loader;
pub mod validation;
use defaults::*;
/// Main agent configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentConfig {
@ -58,7 +55,9 @@ pub struct CpuConfig {
pub struct MemoryConfig {
pub enabled: bool,
pub interval_seconds: u64,
/// Memory usage warning threshold (percentage)
pub usage_warning_percent: f32,
/// Memory usage critical threshold (percentage)
pub usage_critical_percent: f32,
}
@ -67,19 +66,22 @@ pub struct MemoryConfig {
pub struct DiskConfig {
pub enabled: bool,
pub interval_seconds: u64,
/// Disk usage warning threshold (percentage)
pub usage_warning_percent: f32,
/// Disk usage critical threshold (percentage)
pub usage_critical_percent: f32,
/// Filesystem configurations
pub filesystems: Vec<FilesystemConfig>,
}
/// Filesystem configuration entry
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FilesystemConfig {
pub name: String, // Human-readable name (e.g., "root", "boot", "home")
pub uuid: String, // UUID for /dev/disk/by-uuid/ resolution
pub mount_point: String, // Expected mount point (e.g., "/", "/boot")
pub fs_type: String, // Filesystem type (e.g., "ext4", "vfat")
pub monitor: bool, // Whether to monitor this filesystem
pub name: String,
pub uuid: String,
pub mount_point: String,
pub fs_type: String,
pub monitor: bool,
}
/// Process collector configuration
@ -95,8 +97,8 @@ pub struct ProcessConfig {
pub struct SystemdConfig {
pub enabled: bool,
pub interval_seconds: u64,
pub auto_discover: bool,
pub services: Vec<String>,
pub service_name_filters: Vec<String>,
pub excluded_services: Vec<String>,
pub memory_warning_mb: f32,
pub memory_critical_mb: f32,
}
@ -126,8 +128,6 @@ pub struct BackupConfig {
pub struct NetworkConfig {
pub enabled: bool,
pub interval_seconds: u64,
pub interfaces: Vec<String>,
pub auto_discover: bool,
}
/// Notification configuration
@ -142,275 +142,11 @@ pub struct NotificationConfig {
}
impl AgentConfig {
pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
loader::load_config(path)
}
pub fn validate(&self) -> Result<()> {
validation::validate_config(self)
}
}
impl Default for AgentConfig {
fn default() -> Self {
Self {
zmq: ZmqConfig::default(),
collectors: CollectorConfig::default(),
cache: CacheConfig::default(),
notifications: NotificationConfig::default(),
collection_interval_seconds: DEFAULT_COLLECTION_INTERVAL_SECONDS,
}
}
}
impl Default for ZmqConfig {
fn default() -> Self {
Self {
publisher_port: DEFAULT_ZMQ_PUBLISHER_PORT,
command_port: DEFAULT_ZMQ_COMMAND_PORT,
bind_address: DEFAULT_ZMQ_BIND_ADDRESS.to_string(),
timeout_ms: DEFAULT_ZMQ_TIMEOUT_MS,
heartbeat_interval_ms: DEFAULT_ZMQ_HEARTBEAT_INTERVAL_MS,
}
}
}
impl Default for CollectorConfig {
fn default() -> Self {
Self {
cpu: CpuConfig::default(),
memory: MemoryConfig::default(),
disk: DiskConfig::default(),
processes: ProcessConfig::default(),
systemd: SystemdConfig::default(),
smart: SmartConfig::default(),
backup: BackupConfig::default(),
network: NetworkConfig::default(),
}
}
}
impl Default for CpuConfig {
fn default() -> Self {
Self {
enabled: true,
interval_seconds: DEFAULT_CPU_INTERVAL_SECONDS,
load_warning_threshold: DEFAULT_CPU_LOAD_WARNING,
load_critical_threshold: DEFAULT_CPU_LOAD_CRITICAL,
temperature_warning_threshold: DEFAULT_CPU_TEMP_WARNING,
temperature_critical_threshold: DEFAULT_CPU_TEMP_CRITICAL,
}
}
}
impl Default for MemoryConfig {
fn default() -> Self {
Self {
enabled: true,
interval_seconds: DEFAULT_MEMORY_INTERVAL_SECONDS,
usage_warning_percent: DEFAULT_MEMORY_WARNING_PERCENT,
usage_critical_percent: DEFAULT_MEMORY_CRITICAL_PERCENT,
}
}
}
impl Default for DiskConfig {
fn default() -> Self {
let hostname = gethostname::gethostname().to_string_lossy().to_string();
let filesystems = get_default_filesystems_for_host(&hostname);
Self {
enabled: true,
interval_seconds: DEFAULT_DISK_INTERVAL_SECONDS,
usage_warning_percent: DEFAULT_DISK_WARNING_PERCENT,
usage_critical_percent: DEFAULT_DISK_CRITICAL_PERCENT,
filesystems,
}
}
}
/// Get default filesystem configurations for known CMTEC hosts
fn get_default_filesystems_for_host(hostname: &str) -> Vec<FilesystemConfig> {
match hostname {
"cmbox" => vec![
FilesystemConfig {
name: "root".to_string(),
uuid: "4cade5ce-85a5-4a03-83c8-dfd1d3888d79".to_string(),
mount_point: "/".to_string(),
fs_type: "ext4".to_string(),
monitor: true,
},
FilesystemConfig {
name: "boot".to_string(),
uuid: "AB4D-62EC".to_string(),
mount_point: "/boot".to_string(),
fs_type: "vfat".to_string(),
monitor: true,
},
],
"srv02" => vec![
FilesystemConfig {
name: "root".to_string(),
uuid: "5a880608-c79f-458f-a031-30206aa27ca7".to_string(),
mount_point: "/".to_string(),
fs_type: "ext4".to_string(),
monitor: true,
},
FilesystemConfig {
name: "boot".to_string(),
uuid: "6B2E-2AD9".to_string(),
mount_point: "/boot".to_string(),
fs_type: "vfat".to_string(),
monitor: true,
},
],
"simonbox" => vec![
FilesystemConfig {
name: "root".to_string(),
uuid: "b74284a9-2899-4f71-bdb0-fd07dc4baab3".to_string(),
mount_point: "/".to_string(),
fs_type: "ext4".to_string(),
monitor: true,
},
FilesystemConfig {
name: "boot".to_string(),
uuid: "F6A3-AD2B".to_string(),
mount_point: "/boot".to_string(),
fs_type: "vfat".to_string(),
monitor: true,
},
FilesystemConfig {
name: "steampool_1".to_string(),
uuid: "09300cb7-0938-4dba-8a42-7a7aaf60db51".to_string(),
mount_point: "/steampool_1".to_string(),
fs_type: "ext4".to_string(),
monitor: true,
},
FilesystemConfig {
name: "steampool_2".to_string(),
uuid: "a2d61a41-3f2a-4760-b62e-5eb8caf50d1a".to_string(),
mount_point: "/steampool_2".to_string(),
fs_type: "ext4".to_string(),
monitor: true,
},
],
"steambox" => vec![
FilesystemConfig {
name: "root".to_string(),
uuid: "4514ca9f-2d0a-40df-b14b-e342f39c3e6a".to_string(),
mount_point: "/".to_string(),
fs_type: "ext4".to_string(),
monitor: true,
},
FilesystemConfig {
name: "boot".to_string(),
uuid: "8FD2-1B13".to_string(),
mount_point: "/boot".to_string(),
fs_type: "vfat".to_string(),
monitor: true,
},
FilesystemConfig {
name: "steampool".to_string(),
uuid: "0ebe8abb-bbe7-4224-947b-86bf38981f60".to_string(),
mount_point: "/mnt/steampool".to_string(),
fs_type: "ext4".to_string(),
monitor: true,
},
],
"srv01" => vec![
FilesystemConfig {
name: "root".to_string(),
uuid: "cd98df34-03a3-4d68-8338-d90d2920f9f8".to_string(),
mount_point: "/".to_string(),
fs_type: "ext4".to_string(),
monitor: true,
},
FilesystemConfig {
name: "boot".to_string(),
uuid: "13E1-4DDE".to_string(),
mount_point: "/boot".to_string(),
fs_type: "vfat".to_string(),
monitor: true,
},
],
// labbox and wslbox have no UUIDs configured yet
"labbox" | "wslbox" => {
Vec::new()
},
_ => {
// Unknown hosts use auto-discovery
Vec::new()
}
}
}
impl Default for ProcessConfig {
fn default() -> Self {
Self {
enabled: true,
interval_seconds: DEFAULT_PROCESS_INTERVAL_SECONDS,
top_processes_count: DEFAULT_TOP_PROCESSES_COUNT,
}
}
}
impl Default for SystemdConfig {
fn default() -> Self {
Self {
enabled: true,
interval_seconds: DEFAULT_SYSTEMD_INTERVAL_SECONDS,
auto_discover: true,
services: Vec::new(),
memory_warning_mb: DEFAULT_SERVICE_MEMORY_WARNING_MB,
memory_critical_mb: DEFAULT_SERVICE_MEMORY_CRITICAL_MB,
}
}
}
impl Default for SmartConfig {
fn default() -> Self {
Self {
enabled: true,
interval_seconds: DEFAULT_SMART_INTERVAL_SECONDS,
temperature_warning_celsius: DEFAULT_SMART_TEMP_WARNING,
temperature_critical_celsius: DEFAULT_SMART_TEMP_CRITICAL,
wear_warning_percent: DEFAULT_SMART_WEAR_WARNING,
wear_critical_percent: DEFAULT_SMART_WEAR_CRITICAL,
}
}
}
impl Default for BackupConfig {
fn default() -> Self {
Self {
enabled: true,
interval_seconds: DEFAULT_BACKUP_INTERVAL_SECONDS,
backup_paths: Vec::new(),
max_age_hours: DEFAULT_BACKUP_MAX_AGE_HOURS,
}
}
}
impl Default for NetworkConfig {
fn default() -> Self {
Self {
enabled: true,
interval_seconds: DEFAULT_NETWORK_INTERVAL_SECONDS,
interfaces: Vec::new(),
auto_discover: true,
}
}
}
impl Default for NotificationConfig {
fn default() -> Self {
Self {
enabled: true,
smtp_host: DEFAULT_SMTP_HOST.to_string(),
smtp_port: DEFAULT_SMTP_PORT,
from_email: DEFAULT_FROM_EMAIL.to_string(),
to_email: DEFAULT_TO_EMAIL.to_string(),
rate_limit_minutes: DEFAULT_NOTIFICATION_RATE_LIMIT_MINUTES,
}
}
}
}

View File

@ -23,9 +23,9 @@ struct Cli {
#[arg(short, long, action = clap::ArgAction::Count)]
verbose: u8,
/// Configuration file path
/// Configuration file path (required)
#[arg(short, long)]
config: Option<String>,
config: String,
}
#[tokio::main]
@ -46,7 +46,7 @@ async fn main() -> Result<()> {
info!("CM Dashboard Agent starting with individual metrics architecture...");
// Create and run agent
let mut agent = Agent::new(cli.config).await?;
let mut agent = Agent::new(Some(cli.config)).await?;
// Setup graceful shutdown channel
let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel();