445 lines
14 KiB
Rust
445 lines
14 KiB
Rust
use std::collections::HashSet;
|
|
use std::process::Stdio;
|
|
use tokio::fs;
|
|
use tokio::process::Command;
|
|
use tracing::{debug, warn};
|
|
|
|
use crate::collectors::CollectorError;
|
|
|
|
pub struct AutoDiscovery;
|
|
|
|
impl AutoDiscovery {
|
|
/// Auto-detect storage devices suitable for SMART monitoring
|
|
pub async fn discover_storage_devices() -> Vec<String> {
|
|
let mut devices = Vec::new();
|
|
|
|
// Method 1: Try lsblk to find block devices
|
|
if let Ok(lsblk_devices) = Self::discover_via_lsblk().await {
|
|
devices.extend(lsblk_devices);
|
|
}
|
|
|
|
// Method 2: Scan /dev for common device patterns
|
|
if devices.is_empty() {
|
|
if let Ok(dev_devices) = Self::discover_via_dev_scan().await {
|
|
devices.extend(dev_devices);
|
|
}
|
|
}
|
|
|
|
// Method 3: Fallback to common device names
|
|
if devices.is_empty() {
|
|
devices = Self::fallback_device_names();
|
|
}
|
|
|
|
// Remove duplicates and sort
|
|
let mut unique_devices: Vec<String> = devices
|
|
.into_iter()
|
|
.collect::<HashSet<_>>()
|
|
.into_iter()
|
|
.collect();
|
|
unique_devices.sort();
|
|
|
|
debug!("Auto-detected storage devices: {:?}", unique_devices);
|
|
unique_devices
|
|
}
|
|
|
|
async fn discover_via_lsblk() -> Result<Vec<String>, CollectorError> {
|
|
let output = Command::new("/run/current-system/sw/bin/lsblk")
|
|
.args(["-d", "-o", "NAME,TYPE", "-n", "-r"])
|
|
.stdout(Stdio::piped())
|
|
.stderr(Stdio::piped())
|
|
.output()
|
|
.await
|
|
.map_err(|e| CollectorError::CommandFailed {
|
|
command: "lsblk".to_string(),
|
|
message: e.to_string(),
|
|
})?;
|
|
|
|
if !output.status.success() {
|
|
return Err(CollectorError::CommandFailed {
|
|
command: "lsblk".to_string(),
|
|
message: String::from_utf8_lossy(&output.stderr).to_string(),
|
|
});
|
|
}
|
|
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
let mut devices = Vec::new();
|
|
|
|
for line in stdout.lines() {
|
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
|
if parts.len() >= 2 {
|
|
let device_name = parts[0];
|
|
let device_type = parts[1];
|
|
|
|
// Include disk type devices and filter out unwanted ones
|
|
if device_type == "disk" && Self::is_suitable_device(device_name) {
|
|
devices.push(device_name.to_string());
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(devices)
|
|
}
|
|
|
|
async fn discover_via_dev_scan() -> Result<Vec<String>, CollectorError> {
|
|
let mut devices = Vec::new();
|
|
|
|
// Read /dev directory
|
|
let mut dev_entries = fs::read_dir("/dev")
|
|
.await
|
|
.map_err(|e| CollectorError::IoError {
|
|
message: e.to_string(),
|
|
})?;
|
|
|
|
while let Some(entry) =
|
|
dev_entries
|
|
.next_entry()
|
|
.await
|
|
.map_err(|e| CollectorError::IoError {
|
|
message: e.to_string(),
|
|
})?
|
|
{
|
|
let file_name = entry.file_name();
|
|
let device_name = file_name.to_string_lossy();
|
|
|
|
if Self::is_suitable_device(&device_name) {
|
|
devices.push(device_name.to_string());
|
|
}
|
|
}
|
|
|
|
Ok(devices)
|
|
}
|
|
|
|
fn is_suitable_device(device_name: &str) -> bool {
|
|
// Include NVMe, SATA, and other storage devices
|
|
// Exclude partitions, loop devices, etc.
|
|
(device_name.starts_with("nvme") && device_name.contains("n") && !device_name.contains("p")) ||
|
|
(device_name.starts_with("sd") && device_name.len() == 3) || // sda, sdb, etc. not sda1
|
|
(device_name.starts_with("hd") && device_name.len() == 3) || // hda, hdb, etc.
|
|
(device_name.starts_with("vd") && device_name.len() == 3) // vda, vdb for VMs
|
|
}
|
|
|
|
fn fallback_device_names() -> Vec<String> {
|
|
vec!["nvme0n1".to_string(), "sda".to_string(), "sdb".to_string()]
|
|
}
|
|
|
|
/// Auto-detect systemd services suitable for monitoring
|
|
pub async fn discover_services() -> Vec<String> {
|
|
let mut services = Vec::new();
|
|
|
|
// Method 1: Try to find running services
|
|
if let Ok(running_services) = Self::discover_running_services().await {
|
|
services.extend(running_services);
|
|
}
|
|
|
|
// Method 2: Add host-specific services based on hostname
|
|
let hostname = gethostname::gethostname().to_string_lossy().to_string();
|
|
services.extend(Self::get_host_specific_services(&hostname));
|
|
|
|
// Normalize aliases and verify the units actually exist before deduping
|
|
let canonicalized: Vec<String> = services
|
|
.into_iter()
|
|
.filter_map(|svc| Self::canonical_service_name(&svc))
|
|
.collect();
|
|
|
|
let existing = Self::filter_existing_services(&canonicalized).await;
|
|
|
|
let mut unique_services: Vec<String> = existing
|
|
.into_iter()
|
|
.collect::<HashSet<_>>()
|
|
.into_iter()
|
|
.collect();
|
|
unique_services.sort();
|
|
|
|
debug!("Auto-detected services: {:?}", unique_services);
|
|
unique_services
|
|
}
|
|
|
|
async fn discover_running_services() -> Result<Vec<String>, CollectorError> {
|
|
let output = Command::new("/run/current-system/sw/bin/systemctl")
|
|
.args([
|
|
"list-units",
|
|
"--type=service",
|
|
"--state=active",
|
|
"--no-pager",
|
|
"--no-legend",
|
|
])
|
|
.stdout(Stdio::piped())
|
|
.stderr(Stdio::piped())
|
|
.output()
|
|
.await
|
|
.map_err(|e| CollectorError::CommandFailed {
|
|
command: "systemctl list-units".to_string(),
|
|
message: e.to_string(),
|
|
})?;
|
|
|
|
if !output.status.success() {
|
|
return Err(CollectorError::CommandFailed {
|
|
command: "systemctl list-units".to_string(),
|
|
message: String::from_utf8_lossy(&output.stderr).to_string(),
|
|
});
|
|
}
|
|
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
let mut services = Vec::new();
|
|
|
|
for line in stdout.lines() {
|
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
|
if !parts.is_empty() {
|
|
let service_name = parts[0];
|
|
// Remove .service suffix if present
|
|
let clean_name = service_name
|
|
.strip_suffix(".service")
|
|
.unwrap_or(service_name);
|
|
|
|
// Only include services we're interested in monitoring
|
|
if Self::is_monitorable_service(clean_name) {
|
|
services.push(clean_name.to_string());
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(services)
|
|
}
|
|
|
|
fn is_monitorable_service(service_name: &str) -> bool {
|
|
// Skip setup/certificate services that don't need monitoring
|
|
let excluded_services = [
|
|
"mosquitto-certs",
|
|
"immich-setup",
|
|
"phpfpm-kryddorten",
|
|
"phpfpm-mariehall2",
|
|
];
|
|
|
|
for excluded in &excluded_services {
|
|
if service_name.contains(excluded) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Define patterns for services we want to monitor
|
|
let interesting_services = [
|
|
// Web applications
|
|
"gitea",
|
|
"immich",
|
|
"vaultwarden",
|
|
"unifi",
|
|
"wordpress",
|
|
"nginx",
|
|
"httpd",
|
|
// Databases
|
|
"postgresql",
|
|
"mysql",
|
|
"mariadb",
|
|
"redis",
|
|
"mongodb",
|
|
"mongod",
|
|
// Backup and storage
|
|
"borg",
|
|
"rclone",
|
|
// Container runtimes
|
|
"docker",
|
|
// CI/CD services
|
|
"gitea-actions",
|
|
"gitea-runner",
|
|
"actions-runner",
|
|
// Network services
|
|
"sshd",
|
|
"dnsmasq",
|
|
// MQTT and IoT services
|
|
"mosquitto",
|
|
"mqtt",
|
|
// PHP-FPM services
|
|
"phpfpm",
|
|
// Home automation
|
|
"haasp",
|
|
// Backup services
|
|
"backup",
|
|
];
|
|
|
|
// Check if service name contains any of our interesting patterns
|
|
interesting_services
|
|
.iter()
|
|
.any(|&pattern| service_name.contains(pattern) || pattern.contains(service_name))
|
|
}
|
|
|
|
fn get_host_specific_services(_hostname: &str) -> Vec<String> {
|
|
// Pure auto-discovery - no hardcoded host-specific services
|
|
vec![]
|
|
}
|
|
|
|
fn canonical_service_name(service: &str) -> Option<String> {
|
|
let trimmed = service.trim();
|
|
if trimmed.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
let lower = trimmed.to_lowercase();
|
|
let aliases = [
|
|
("ssh", "sshd"),
|
|
("sshd", "sshd"),
|
|
("docker.service", "docker"),
|
|
];
|
|
|
|
for (alias, target) in aliases {
|
|
if lower == alias {
|
|
return Some(target.to_string());
|
|
}
|
|
}
|
|
|
|
Some(trimmed.to_string())
|
|
}
|
|
|
|
async fn filter_existing_services(services: &[String]) -> Vec<String> {
|
|
let mut existing = Vec::new();
|
|
|
|
for service in services {
|
|
if Self::service_exists(service).await {
|
|
existing.push(service.clone());
|
|
}
|
|
}
|
|
|
|
existing
|
|
}
|
|
|
|
async fn service_exists(service: &str) -> bool {
|
|
let unit = if service.ends_with(".service") {
|
|
service.to_string()
|
|
} else {
|
|
format!("{}.service", service)
|
|
};
|
|
|
|
match Command::new("/run/current-system/sw/bin/systemctl")
|
|
.args(["status", &unit])
|
|
.stdout(Stdio::null())
|
|
.stderr(Stdio::null())
|
|
.output()
|
|
.await
|
|
{
|
|
Ok(output) => output.status.success(),
|
|
Err(error) => {
|
|
warn!("Failed to check service {}: {}", unit, error);
|
|
false
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Auto-detect backup configuration
|
|
pub async fn discover_backup_config(hostname: &str) -> (bool, Option<String>, String) {
|
|
// Check if this host should have backup monitoring
|
|
let backup_enabled = hostname == "srv01" || Self::has_backup_service().await;
|
|
|
|
// Try to find restic repository
|
|
let restic_repo = if backup_enabled {
|
|
Self::discover_restic_repo().await
|
|
} else {
|
|
None
|
|
};
|
|
|
|
// Determine backup service name
|
|
let backup_service = Self::discover_backup_service()
|
|
.await
|
|
.unwrap_or_else(|| "restic-backup".to_string());
|
|
|
|
(backup_enabled, restic_repo, backup_service)
|
|
}
|
|
|
|
async fn has_backup_service() -> bool {
|
|
// Check for common backup services
|
|
let backup_services = ["restic", "borg", "duplicati", "rclone"];
|
|
|
|
for service in backup_services {
|
|
if let Ok(output) = Command::new("/run/current-system/sw/bin/systemctl")
|
|
.args(["is-enabled", service])
|
|
.output()
|
|
.await
|
|
{
|
|
if output.status.success() {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
async fn discover_restic_repo() -> Option<String> {
|
|
// Common restic repository locations
|
|
let common_paths = [
|
|
"/srv/backups/restic",
|
|
"/var/backups/restic",
|
|
"/home/restic",
|
|
"/backup/restic",
|
|
"/mnt/backup/restic",
|
|
];
|
|
|
|
for path in common_paths {
|
|
if fs::metadata(path).await.is_ok() {
|
|
debug!("Found restic repository at: {}", path);
|
|
return Some(path.to_string());
|
|
}
|
|
}
|
|
|
|
// Try to find via environment variables or config files
|
|
if let Ok(content) = fs::read_to_string("/etc/restic/repository").await {
|
|
let repo_path = content.trim();
|
|
if !repo_path.is_empty() {
|
|
return Some(repo_path.to_string());
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
async fn discover_backup_service() -> Option<String> {
|
|
let backup_services = ["restic-backup", "restic", "borg-backup", "borg", "backup"];
|
|
|
|
for service in backup_services {
|
|
if let Ok(output) = Command::new("/run/current-system/sw/bin/systemctl")
|
|
.args(["is-enabled", &format!("{}.service", service)])
|
|
.output()
|
|
.await
|
|
{
|
|
if output.status.success() {
|
|
return Some(service.to_string());
|
|
}
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
/// Validate auto-detected configuration
|
|
pub async fn validate_devices(devices: &[String]) -> Vec<String> {
|
|
let mut valid_devices = Vec::new();
|
|
|
|
for device in devices {
|
|
if Self::can_access_device(device).await {
|
|
valid_devices.push(device.clone());
|
|
} else {
|
|
warn!("Cannot access device {}, skipping", device);
|
|
}
|
|
}
|
|
|
|
valid_devices
|
|
}
|
|
|
|
async fn can_access_device(device: &str) -> bool {
|
|
let device_path = format!("/dev/{}", device);
|
|
|
|
// Try to run smartctl to see if device is accessible
|
|
if let Ok(output) = Command::new("sudo")
|
|
.args(["/run/current-system/sw/bin/smartctl", "-i", &device_path])
|
|
.stdout(Stdio::piped())
|
|
.stderr(Stdio::piped())
|
|
.output()
|
|
.await
|
|
{
|
|
// smartctl returns 0 for success, but may return other codes for warnings
|
|
// that are still acceptable (like device supports SMART but has some issues)
|
|
output.status.code().map_or(false, |code| code <= 4)
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
}
|