All checks were successful
Build and Release / build-and-release (push) Successful in 1m19s
- Add SnapRAID parity drive detection to mergerfs discovery - Remove Pool Status health line as discussed - Update drive display to always show wear data when available - Include /mnt/parity drives as part of mergerfs pool structure
968 lines
36 KiB
Rust
968 lines
36 KiB
Rust
use anyhow::Result;
|
|
use async_trait::async_trait;
|
|
use cm_dashboard_shared::{Metric, MetricValue, Status, StatusTracker, HysteresisThresholds};
|
|
|
|
use crate::config::DiskConfig;
|
|
use std::process::Command;
|
|
use std::time::Instant;
|
|
use std::collections::HashMap;
|
|
use tracing::debug;
|
|
|
|
use super::{Collector, CollectorError};
|
|
|
|
/// Storage collector with clean architecture
|
|
pub struct DiskCollector {
|
|
config: DiskConfig,
|
|
temperature_thresholds: HysteresisThresholds,
|
|
}
|
|
|
|
/// A physical drive with its filesystems
|
|
#[derive(Debug, Clone)]
|
|
struct PhysicalDrive {
|
|
device: String, // e.g., "nvme0n1", "sda"
|
|
filesystems: Vec<Filesystem>, // mounted filesystems on this drive
|
|
temperature: Option<f32>, // drive temperature
|
|
wear_level: Option<f32>, // SSD wear level
|
|
health_status: String, // SMART health
|
|
}
|
|
|
|
/// A mergerfs pool
|
|
#[derive(Debug, Clone)]
|
|
struct MergerfsPool {
|
|
mount_point: String, // e.g., "/srv/media"
|
|
total_bytes: u64, // pool total capacity
|
|
used_bytes: u64, // pool used space
|
|
data_drives: Vec<DriveInfo>, // data member drives
|
|
parity_drives: Vec<DriveInfo>, // parity drives
|
|
}
|
|
|
|
/// Individual filesystem on a drive
|
|
#[derive(Debug, Clone)]
|
|
struct Filesystem {
|
|
mount_point: String, // e.g., "/", "/boot"
|
|
total_bytes: u64, // filesystem capacity
|
|
used_bytes: u64, // filesystem used space
|
|
}
|
|
|
|
/// Drive information for pools
|
|
#[derive(Debug, Clone)]
|
|
struct DriveInfo {
|
|
device: String, // e.g., "sdb", "sdc"
|
|
mount_point: String, // e.g., "/mnt/disk1"
|
|
temperature: Option<f32>, // drive temperature
|
|
wear_level: Option<f32>, // SSD wear level
|
|
health_status: String, // SMART health
|
|
}
|
|
|
|
/// Discovered storage topology
|
|
#[derive(Debug)]
|
|
struct StorageTopology {
|
|
physical_drives: Vec<PhysicalDrive>,
|
|
mergerfs_pools: Vec<MergerfsPool>,
|
|
}
|
|
|
|
impl DiskCollector {
|
|
pub fn new(config: DiskConfig) -> Self {
|
|
let temperature_thresholds = HysteresisThresholds::with_custom_gaps(
|
|
config.temperature_warning_celsius,
|
|
5.0,
|
|
config.temperature_critical_celsius,
|
|
5.0,
|
|
);
|
|
|
|
Self {
|
|
config,
|
|
temperature_thresholds,
|
|
}
|
|
}
|
|
|
|
/// Discover all storage using clean workflow: lsblk → df → group
|
|
fn discover_storage(&self) -> Result<StorageTopology> {
|
|
debug!("Starting storage discovery");
|
|
|
|
// Step 1: Get all mount points and their backing devices using lsblk
|
|
let mount_devices = self.get_mount_devices()?;
|
|
debug!("Found {} mount points", mount_devices.len());
|
|
|
|
// Step 2: Get filesystem usage for each mount point using df
|
|
let filesystem_usage = self.get_filesystem_usage(&mount_devices)?;
|
|
debug!("Got usage data for {} filesystems", filesystem_usage.len());
|
|
|
|
// Step 3: Detect mergerfs pools from /proc/mounts
|
|
let mergerfs_pools = self.discover_mergerfs_pools()?;
|
|
debug!("Found {} mergerfs pools", mergerfs_pools.len());
|
|
|
|
// Step 4: Group regular filesystems by physical drive
|
|
let physical_drives = self.group_by_physical_drive(&mount_devices, &filesystem_usage, &mergerfs_pools)?;
|
|
debug!("Grouped into {} physical drives", physical_drives.len());
|
|
|
|
Ok(StorageTopology {
|
|
physical_drives,
|
|
mergerfs_pools,
|
|
})
|
|
}
|
|
|
|
/// Use lsblk to get mount points and their backing devices
|
|
fn get_mount_devices(&self) -> Result<HashMap<String, String>> {
|
|
let output = Command::new("lsblk")
|
|
.args(&["-n", "-o", "NAME,MOUNTPOINT"])
|
|
.output()?;
|
|
|
|
if !output.status.success() {
|
|
return Err(anyhow::anyhow!("lsblk command failed"));
|
|
}
|
|
|
|
let mut mount_devices = HashMap::new();
|
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
|
|
|
for line in output_str.lines() {
|
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
|
if parts.len() >= 2 {
|
|
let device_name = parts[0]
|
|
.trim_start_matches(&['├', '└', '─', ' '][..]);
|
|
let mount_point = parts[1];
|
|
|
|
// Skip unwanted mount points
|
|
if self.should_skip_mount_point(mount_point) {
|
|
continue;
|
|
}
|
|
|
|
mount_devices.insert(mount_point.to_string(), device_name.to_string());
|
|
}
|
|
}
|
|
|
|
Ok(mount_devices)
|
|
}
|
|
|
|
/// Check if we should skip this mount point
|
|
fn should_skip_mount_point(&self, mount_point: &str) -> bool {
|
|
let skip_prefixes = ["/proc", "/sys", "/dev", "/tmp", "/run"];
|
|
skip_prefixes.iter().any(|prefix| mount_point.starts_with(prefix))
|
|
}
|
|
|
|
/// Use df to get filesystem usage for mount points
|
|
fn get_filesystem_usage(&self, mount_devices: &HashMap<String, String>) -> Result<HashMap<String, (u64, u64)>> {
|
|
let mut filesystem_usage = HashMap::new();
|
|
|
|
for mount_point in mount_devices.keys() {
|
|
match self.get_filesystem_info(mount_point) {
|
|
Ok((total, used)) => {
|
|
filesystem_usage.insert(mount_point.clone(), (total, used));
|
|
}
|
|
Err(e) => {
|
|
debug!("Failed to get filesystem info for {}: {}", mount_point, e);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(filesystem_usage)
|
|
}
|
|
|
|
/// Get filesystem info using df command
|
|
fn get_filesystem_info(&self, path: &str) -> Result<(u64, u64)> {
|
|
let output = Command::new("df")
|
|
.arg("--block-size=1")
|
|
.arg(path)
|
|
.output()?;
|
|
|
|
if !output.status.success() {
|
|
return Err(anyhow::anyhow!("df command failed for {}", path));
|
|
}
|
|
|
|
let output_str = String::from_utf8(output.stdout)?;
|
|
let lines: Vec<&str> = output_str.lines().collect();
|
|
|
|
if lines.len() < 2 {
|
|
return Err(anyhow::anyhow!("Unexpected df output format"));
|
|
}
|
|
|
|
let fields: Vec<&str> = lines[1].split_whitespace().collect();
|
|
if fields.len() < 4 {
|
|
return Err(anyhow::anyhow!("Unexpected df fields count"));
|
|
}
|
|
|
|
let total_bytes = fields[1].parse::<u64>()?;
|
|
let used_bytes = fields[2].parse::<u64>()?;
|
|
|
|
Ok((total_bytes, used_bytes))
|
|
}
|
|
|
|
/// Discover mergerfs pools from /proc/mounts
|
|
fn discover_mergerfs_pools(&self) -> Result<Vec<MergerfsPool>> {
|
|
let mounts_content = std::fs::read_to_string("/proc/mounts")?;
|
|
let mut pools = Vec::new();
|
|
|
|
for line in mounts_content.lines() {
|
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
|
if parts.len() >= 3 && parts[2] == "fuse.mergerfs" {
|
|
let mount_point = parts[1].to_string();
|
|
let device_sources = parts[0]; // e.g., "/mnt/disk1:/mnt/disk2"
|
|
|
|
// Get pool usage
|
|
let (total_bytes, used_bytes) = self.get_filesystem_info(&mount_point)
|
|
.unwrap_or((0, 0));
|
|
|
|
// Parse member paths - handle both full paths and numeric references
|
|
let raw_paths: Vec<String> = device_sources
|
|
.split(':')
|
|
.map(|s| s.trim().to_string())
|
|
.filter(|s| !s.is_empty())
|
|
.collect();
|
|
|
|
// Convert numeric references to actual mount points if needed
|
|
let mut member_paths = if raw_paths.iter().any(|path| !path.starts_with('/')) {
|
|
// Handle numeric format like "1:2" by finding corresponding /mnt/disk* paths
|
|
self.resolve_numeric_mergerfs_paths(&raw_paths)?
|
|
} else {
|
|
// Already full paths
|
|
raw_paths
|
|
};
|
|
|
|
// For SnapRAID setups, also include parity drives as part of the pool
|
|
let snapraid_parity_paths = self.discover_snapraid_parity_drives()?;
|
|
member_paths.extend(snapraid_parity_paths);
|
|
|
|
// Categorize as data vs parity drives
|
|
let (data_drives, parity_drives) = match self.categorize_pool_drives(&member_paths) {
|
|
Ok(drives) => drives,
|
|
Err(e) => {
|
|
debug!("Failed to categorize drives for pool {}: {}. Skipping.", mount_point, e);
|
|
continue;
|
|
}
|
|
};
|
|
|
|
pools.push(MergerfsPool {
|
|
mount_point,
|
|
total_bytes,
|
|
used_bytes,
|
|
data_drives,
|
|
parity_drives,
|
|
});
|
|
}
|
|
}
|
|
|
|
Ok(pools)
|
|
}
|
|
|
|
/// Discover SnapRAID parity drives
|
|
fn discover_snapraid_parity_drives(&self) -> Result<Vec<String>> {
|
|
let mount_devices = self.get_mount_devices()?;
|
|
let parity_paths: Vec<String> = mount_devices.keys()
|
|
.filter(|path| path.contains("parity"))
|
|
.cloned()
|
|
.collect();
|
|
Ok(parity_paths)
|
|
}
|
|
|
|
/// Categorize pool member drives as data vs parity
|
|
fn categorize_pool_drives(&self, member_paths: &[String]) -> Result<(Vec<DriveInfo>, Vec<DriveInfo>)> {
|
|
let mut data_drives = Vec::new();
|
|
let mut parity_drives = Vec::new();
|
|
|
|
for path in member_paths {
|
|
let drive_info = self.get_drive_info_for_path(path)?;
|
|
|
|
// Heuristic: if path contains "parity", it's parity
|
|
if path.to_lowercase().contains("parity") {
|
|
parity_drives.push(drive_info);
|
|
} else {
|
|
data_drives.push(drive_info);
|
|
}
|
|
}
|
|
|
|
Ok((data_drives, parity_drives))
|
|
}
|
|
|
|
/// Get drive information for a mount path
|
|
fn get_drive_info_for_path(&self, path: &str) -> Result<DriveInfo> {
|
|
// Use lsblk to find the backing device
|
|
let output = Command::new("lsblk")
|
|
.args(&["-n", "-o", "NAME,MOUNTPOINT"])
|
|
.output()?;
|
|
|
|
let output_str = String::from_utf8_lossy(&output.stdout);
|
|
let mut device = String::new();
|
|
|
|
for line in output_str.lines() {
|
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
|
if parts.len() >= 2 && parts[1] == path {
|
|
device = parts[0]
|
|
.trim_start_matches('├')
|
|
.trim_start_matches('└')
|
|
.trim_start_matches('─')
|
|
.trim()
|
|
.to_string();
|
|
break;
|
|
}
|
|
}
|
|
|
|
if device.is_empty() {
|
|
return Err(anyhow::anyhow!("Could not find device for path {}", path));
|
|
}
|
|
|
|
// Extract base device name (e.g., "sda1" -> "sda")
|
|
let base_device = self.extract_base_device(&device);
|
|
|
|
// Get SMART data
|
|
let (health, temperature, wear) = self.get_smart_data(&format!("/dev/{}", base_device));
|
|
|
|
Ok(DriveInfo {
|
|
device: base_device,
|
|
mount_point: path.to_string(),
|
|
temperature,
|
|
wear_level: wear,
|
|
health_status: health,
|
|
})
|
|
}
|
|
|
|
/// Resolve numeric mergerfs references like "1:2" to actual mount paths
|
|
fn resolve_numeric_mergerfs_paths(&self, numeric_refs: &[String]) -> Result<Vec<String>> {
|
|
let mut resolved_paths = Vec::new();
|
|
|
|
// Get all mount points that look like /mnt/disk* or /mnt/parity*
|
|
let mount_devices = self.get_mount_devices()?;
|
|
let mut disk_mounts: Vec<String> = mount_devices.keys()
|
|
.filter(|path| path.starts_with("/mnt/disk") || path.starts_with("/mnt/parity"))
|
|
.cloned()
|
|
.collect();
|
|
disk_mounts.sort(); // Ensure consistent ordering
|
|
|
|
for num_ref in numeric_refs {
|
|
if let Ok(index) = num_ref.parse::<usize>() {
|
|
// Convert 1-based index to 0-based
|
|
if index > 0 && index <= disk_mounts.len() {
|
|
resolved_paths.push(disk_mounts[index - 1].clone());
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback: if we couldn't resolve, return the original paths
|
|
if resolved_paths.is_empty() {
|
|
resolved_paths = numeric_refs.to_vec();
|
|
}
|
|
|
|
Ok(resolved_paths)
|
|
}
|
|
|
|
/// Extract base device name from partition (e.g., "nvme0n1p2" -> "nvme0n1", "sda1" -> "sda")
|
|
fn extract_base_device(&self, device_name: &str) -> String {
|
|
// Handle NVMe devices (nvme0n1p1 -> nvme0n1)
|
|
if device_name.starts_with("nvme") {
|
|
if let Some(p_pos) = device_name.find('p') {
|
|
return device_name[..p_pos].to_string();
|
|
}
|
|
}
|
|
|
|
// Handle traditional devices (sda1 -> sda)
|
|
if device_name.len() > 1 {
|
|
let chars: Vec<char> = device_name.chars().collect();
|
|
let mut end_idx = chars.len();
|
|
|
|
// Find where the device name ends and partition number begins
|
|
for (i, &c) in chars.iter().enumerate().rev() {
|
|
if !c.is_ascii_digit() {
|
|
end_idx = i + 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if end_idx > 0 && end_idx < chars.len() {
|
|
return chars[..end_idx].iter().collect();
|
|
}
|
|
}
|
|
|
|
// If no partition detected, return as-is
|
|
device_name.to_string()
|
|
}
|
|
|
|
/// Group filesystems by physical drive (excluding mergerfs members)
|
|
fn group_by_physical_drive(
|
|
&self,
|
|
mount_devices: &HashMap<String, String>,
|
|
filesystem_usage: &HashMap<String, (u64, u64)>,
|
|
mergerfs_pools: &[MergerfsPool]
|
|
) -> Result<Vec<PhysicalDrive>> {
|
|
let mut drive_groups: HashMap<String, Vec<Filesystem>> = HashMap::new();
|
|
|
|
// Get all mergerfs member paths to exclude them
|
|
let mut mergerfs_members = std::collections::HashSet::new();
|
|
for pool in mergerfs_pools {
|
|
for drive in &pool.data_drives {
|
|
mergerfs_members.insert(drive.mount_point.clone());
|
|
}
|
|
for drive in &pool.parity_drives {
|
|
mergerfs_members.insert(drive.mount_point.clone());
|
|
}
|
|
}
|
|
|
|
// Group filesystems by base device
|
|
for (mount_point, device) in mount_devices {
|
|
// Skip mergerfs member mounts
|
|
if mergerfs_members.contains(mount_point) {
|
|
continue;
|
|
}
|
|
|
|
let base_device = self.extract_base_device(device);
|
|
|
|
if let Some((total, used)) = filesystem_usage.get(mount_point) {
|
|
let filesystem = Filesystem {
|
|
mount_point: mount_point.clone(),
|
|
total_bytes: *total,
|
|
used_bytes: *used,
|
|
};
|
|
|
|
drive_groups.entry(base_device).or_insert_with(Vec::new).push(filesystem);
|
|
}
|
|
}
|
|
|
|
// Convert to PhysicalDrive structs with SMART data
|
|
let mut physical_drives = Vec::new();
|
|
for (device, filesystems) in drive_groups {
|
|
let (health, temperature, wear) = self.get_smart_data(&format!("/dev/{}", device));
|
|
|
|
physical_drives.push(PhysicalDrive {
|
|
device,
|
|
filesystems,
|
|
temperature,
|
|
wear_level: wear,
|
|
health_status: health,
|
|
});
|
|
}
|
|
|
|
Ok(physical_drives)
|
|
}
|
|
|
|
/// Get SMART data for a drive
|
|
fn get_smart_data(&self, device_path: &str) -> (String, Option<f32>, Option<f32>) {
|
|
let output = Command::new("sudo")
|
|
.arg("smartctl")
|
|
.arg("-a")
|
|
.arg(device_path)
|
|
.output();
|
|
|
|
match output {
|
|
Ok(result) if result.status.success() => {
|
|
let stdout = String::from_utf8_lossy(&result.stdout);
|
|
|
|
// Parse health status
|
|
let health = if stdout.contains("PASSED") {
|
|
"PASSED".to_string()
|
|
} else if stdout.contains("FAILED") {
|
|
"FAILED".to_string()
|
|
} else {
|
|
"UNKNOWN".to_string()
|
|
};
|
|
|
|
// Parse temperature and wear level
|
|
let temperature = self.parse_temperature_from_smart(&stdout);
|
|
let wear_level = self.parse_wear_level_from_smart(&stdout);
|
|
|
|
(health, temperature, wear_level)
|
|
}
|
|
_ => {
|
|
debug!("Failed to get SMART data for {}", device_path);
|
|
("UNKNOWN".to_string(), None, None)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parse temperature from SMART output
|
|
fn parse_temperature_from_smart(&self, smart_output: &str) -> Option<f32> {
|
|
for line in smart_output.lines() {
|
|
if line.contains("Temperature_Celsius") || line.contains("Temperature") {
|
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
|
if parts.len() >= 10 {
|
|
if let Ok(temp) = parts[9].parse::<f32>() {
|
|
return Some(temp);
|
|
}
|
|
}
|
|
}
|
|
if line.contains("temperature:") {
|
|
if let Some(temp_part) = line.split("temperature:").nth(1) {
|
|
if let Some(temp_str) = temp_part.split_whitespace().next() {
|
|
if let Ok(temp) = temp_str.parse::<f32>() {
|
|
return Some(temp);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Parse wear level from SMART output
|
|
fn parse_wear_level_from_smart(&self, smart_output: &str) -> Option<f32> {
|
|
for line in smart_output.lines() {
|
|
if line.contains("Percentage Used:") {
|
|
if let Some(wear_part) = line.split("Percentage Used:").nth(1) {
|
|
if let Some(wear_str) = wear_part.split('%').next() {
|
|
if let Ok(wear) = wear_str.trim().parse::<f32>() {
|
|
return Some(wear);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let parts: Vec<&str> = line.split_whitespace().collect();
|
|
if parts.len() >= 10 {
|
|
if line.contains("SSD_Life_Left") || line.contains("Percent_Lifetime_Remain") {
|
|
if let Ok(remaining) = parts[3].parse::<f32>() {
|
|
return Some(100.0 - remaining);
|
|
}
|
|
}
|
|
if line.contains("Wear_Leveling_Count") {
|
|
if let Ok(wear_count) = parts[3].parse::<f32>() {
|
|
if wear_count <= 100.0 {
|
|
return Some(100.0 - wear_count);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Calculate temperature status with hysteresis
|
|
fn calculate_temperature_status(&self, metric_name: &str, temperature: f32, status_tracker: &mut StatusTracker) -> Status {
|
|
status_tracker.calculate_with_hysteresis(metric_name, temperature, &self.temperature_thresholds)
|
|
}
|
|
|
|
/// Convert bytes to human readable format
|
|
fn bytes_to_human_readable(&self, bytes: u64) -> String {
|
|
const UNITS: &[&str] = &["B", "K", "M", "G", "T"];
|
|
let mut size = bytes as f64;
|
|
let mut unit_index = 0;
|
|
|
|
while size >= 1024.0 && unit_index < UNITS.len() - 1 {
|
|
size /= 1024.0;
|
|
unit_index += 1;
|
|
}
|
|
|
|
if unit_index == 0 {
|
|
format!("{:.0}{}", size, UNITS[unit_index])
|
|
} else {
|
|
format!("{:.1}{}", size, UNITS[unit_index])
|
|
}
|
|
}
|
|
|
|
/// Convert bytes to gigabytes
|
|
fn bytes_to_gb(&self, bytes: u64) -> f32 {
|
|
bytes as f32 / (1024.0 * 1024.0 * 1024.0)
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Collector for DiskCollector {
|
|
async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
|
|
let start_time = Instant::now();
|
|
debug!("Starting clean storage collection");
|
|
|
|
let mut metrics = Vec::new();
|
|
let timestamp = chrono::Utc::now().timestamp() as u64;
|
|
|
|
// Discover storage topology
|
|
let topology = match self.discover_storage() {
|
|
Ok(topology) => topology,
|
|
Err(e) => {
|
|
tracing::error!("Storage discovery failed: {}", e);
|
|
return Ok(metrics);
|
|
}
|
|
};
|
|
|
|
// Generate metrics for physical drives
|
|
for drive in &topology.physical_drives {
|
|
self.generate_physical_drive_metrics(&mut metrics, drive, timestamp, status_tracker);
|
|
}
|
|
|
|
// Generate metrics for mergerfs pools
|
|
for pool in &topology.mergerfs_pools {
|
|
self.generate_mergerfs_pool_metrics(&mut metrics, pool, timestamp, status_tracker);
|
|
}
|
|
|
|
// Add total storage count
|
|
let total_storage = topology.physical_drives.len() + topology.mergerfs_pools.len();
|
|
metrics.push(Metric {
|
|
name: "disk_count".to_string(),
|
|
value: MetricValue::Integer(total_storage as i64),
|
|
unit: None,
|
|
description: Some(format!("Total storage: {} drives, {} pools", topology.physical_drives.len(), topology.mergerfs_pools.len())),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
});
|
|
|
|
let collection_time = start_time.elapsed();
|
|
debug!("Clean storage collection completed in {:?} with {} metrics", collection_time, metrics.len());
|
|
|
|
Ok(metrics)
|
|
}
|
|
}
|
|
|
|
impl DiskCollector {
|
|
/// Generate metrics for a physical drive and its filesystems
|
|
fn generate_physical_drive_metrics(
|
|
&self,
|
|
metrics: &mut Vec<Metric>,
|
|
drive: &PhysicalDrive,
|
|
timestamp: u64,
|
|
status_tracker: &mut StatusTracker
|
|
) {
|
|
let drive_name = &drive.device;
|
|
|
|
// Calculate drive totals
|
|
let total_capacity: u64 = drive.filesystems.iter().map(|fs| fs.total_bytes).sum();
|
|
let total_used: u64 = drive.filesystems.iter().map(|fs| fs.used_bytes).sum();
|
|
let total_available = total_capacity.saturating_sub(total_used);
|
|
let usage_percent = if total_capacity > 0 {
|
|
(total_used as f64 / total_capacity as f64) * 100.0
|
|
} else { 0.0 };
|
|
|
|
// Drive health status
|
|
let health_status = if drive.health_status == "PASSED" { Status::Ok }
|
|
else if drive.health_status == "FAILED" { Status::Critical }
|
|
else { Status::Unknown };
|
|
|
|
// Usage status
|
|
let usage_status = if usage_percent >= self.config.usage_critical_percent as f64 {
|
|
Status::Critical
|
|
} else if usage_percent >= self.config.usage_warning_percent as f64 {
|
|
Status::Warning
|
|
} else {
|
|
Status::Ok
|
|
};
|
|
|
|
let drive_status = if health_status == Status::Critical { Status::Critical } else { usage_status };
|
|
|
|
// Drive info metrics
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_health", drive_name),
|
|
value: MetricValue::String(drive.health_status.clone()),
|
|
unit: None,
|
|
description: Some(format!("{}: {}", drive_name, drive.health_status)),
|
|
status: health_status,
|
|
timestamp,
|
|
});
|
|
|
|
// Drive temperature
|
|
if let Some(temp) = drive.temperature {
|
|
let temp_status = self.calculate_temperature_status(
|
|
&format!("disk_{}_temperature", drive_name), temp, status_tracker
|
|
);
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_temperature", drive_name),
|
|
value: MetricValue::Float(temp),
|
|
unit: Some("°C".to_string()),
|
|
description: Some(format!("{}: {:.0}°C", drive_name, temp)),
|
|
status: temp_status,
|
|
timestamp,
|
|
});
|
|
}
|
|
|
|
// Drive wear level
|
|
if let Some(wear) = drive.wear_level {
|
|
let wear_status = if wear >= self.config.wear_critical_percent { Status::Critical }
|
|
else if wear >= self.config.wear_warning_percent { Status::Warning }
|
|
else { Status::Ok };
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_wear_percent", drive_name),
|
|
value: MetricValue::Float(wear),
|
|
unit: Some("%".to_string()),
|
|
description: Some(format!("{}: {:.0}% wear", drive_name, wear)),
|
|
status: wear_status,
|
|
timestamp,
|
|
});
|
|
}
|
|
|
|
// Drive capacity metrics
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_total_gb", drive_name),
|
|
value: MetricValue::Float(self.bytes_to_gb(total_capacity)),
|
|
unit: Some("GB".to_string()),
|
|
description: Some(format!("{}: {}", drive_name, self.bytes_to_human_readable(total_capacity))),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_used_gb", drive_name),
|
|
value: MetricValue::Float(self.bytes_to_gb(total_used)),
|
|
unit: Some("GB".to_string()),
|
|
description: Some(format!("{}: {}", drive_name, self.bytes_to_human_readable(total_used))),
|
|
status: drive_status.clone(),
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_available_gb", drive_name),
|
|
value: MetricValue::Float(self.bytes_to_gb(total_available)),
|
|
unit: Some("GB".to_string()),
|
|
description: Some(format!("{}: {}", drive_name, self.bytes_to_human_readable(total_available))),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_usage_percent", drive_name),
|
|
value: MetricValue::Float(usage_percent as f32),
|
|
unit: Some("%".to_string()),
|
|
description: Some(format!("{}: {:.1}%", drive_name, usage_percent)),
|
|
status: drive_status,
|
|
timestamp,
|
|
});
|
|
|
|
// Pool type indicator
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_pool_type", drive_name),
|
|
value: MetricValue::String(format!("drive ({})", drive.filesystems.len())),
|
|
unit: None,
|
|
description: Some(format!("Type: physical drive")),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
});
|
|
|
|
// Individual filesystem metrics
|
|
for filesystem in &drive.filesystems {
|
|
let fs_name = if filesystem.mount_point == "/" {
|
|
"root".to_string()
|
|
} else {
|
|
filesystem.mount_point.trim_start_matches('/').replace('/', "_")
|
|
};
|
|
|
|
let fs_usage_percent = if filesystem.total_bytes > 0 {
|
|
(filesystem.used_bytes as f64 / filesystem.total_bytes as f64) * 100.0
|
|
} else { 0.0 };
|
|
|
|
let fs_status = if fs_usage_percent >= self.config.usage_critical_percent as f64 {
|
|
Status::Critical
|
|
} else if fs_usage_percent >= self.config.usage_warning_percent as f64 {
|
|
Status::Warning
|
|
} else {
|
|
Status::Ok
|
|
};
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_fs_{}_usage_percent", drive_name, fs_name),
|
|
value: MetricValue::Float(fs_usage_percent as f32),
|
|
unit: Some("%".to_string()),
|
|
description: Some(format!("{}: {:.0}%", filesystem.mount_point, fs_usage_percent)),
|
|
status: fs_status.clone(),
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_fs_{}_used_gb", drive_name, fs_name),
|
|
value: MetricValue::Float(self.bytes_to_gb(filesystem.used_bytes)),
|
|
unit: Some("GB".to_string()),
|
|
description: Some(format!("{}: {}", filesystem.mount_point, self.bytes_to_human_readable(filesystem.used_bytes))),
|
|
status: fs_status.clone(),
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_fs_{}_total_gb", drive_name, fs_name),
|
|
value: MetricValue::Float(self.bytes_to_gb(filesystem.total_bytes)),
|
|
unit: Some("GB".to_string()),
|
|
description: Some(format!("{}: {}", filesystem.mount_point, self.bytes_to_human_readable(filesystem.total_bytes))),
|
|
status: fs_status.clone(),
|
|
timestamp,
|
|
});
|
|
|
|
let fs_available = filesystem.total_bytes.saturating_sub(filesystem.used_bytes);
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_fs_{}_available_gb", drive_name, fs_name),
|
|
value: MetricValue::Float(self.bytes_to_gb(fs_available)),
|
|
unit: Some("GB".to_string()),
|
|
description: Some(format!("{}: {}", filesystem.mount_point, self.bytes_to_human_readable(fs_available))),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_fs_{}_mount_point", drive_name, fs_name),
|
|
value: MetricValue::String(filesystem.mount_point.clone()),
|
|
unit: None,
|
|
description: Some(format!("Mount: {}", filesystem.mount_point)),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
});
|
|
}
|
|
}
|
|
|
|
/// Generate metrics for a mergerfs pool
|
|
fn generate_mergerfs_pool_metrics(
|
|
&self,
|
|
metrics: &mut Vec<Metric>,
|
|
pool: &MergerfsPool,
|
|
timestamp: u64,
|
|
status_tracker: &mut StatusTracker
|
|
) {
|
|
// Use consistent pool naming: extract mount point without leading slash
|
|
let pool_name = if pool.mount_point == "/" {
|
|
"root".to_string()
|
|
} else {
|
|
pool.mount_point.trim_start_matches('/').replace('/', "_")
|
|
};
|
|
|
|
if pool_name.is_empty() {
|
|
return;
|
|
}
|
|
|
|
let usage_percent = if pool.total_bytes > 0 {
|
|
(pool.used_bytes as f64 / pool.total_bytes as f64) * 100.0
|
|
} else { 0.0 };
|
|
|
|
// Calculate pool health based on drive health
|
|
let failed_data = pool.data_drives.iter()
|
|
.filter(|d| d.health_status != "PASSED")
|
|
.count();
|
|
let failed_parity = pool.parity_drives.iter()
|
|
.filter(|d| d.health_status != "PASSED")
|
|
.count();
|
|
|
|
let pool_health = match (failed_data, failed_parity) {
|
|
(0, 0) => Status::Ok,
|
|
(1, 0) | (0, 1) => Status::Warning,
|
|
_ => Status::Critical,
|
|
};
|
|
|
|
let usage_status = if usage_percent >= self.config.usage_critical_percent as f64 {
|
|
Status::Critical
|
|
} else if usage_percent >= self.config.usage_warning_percent as f64 {
|
|
Status::Warning
|
|
} else {
|
|
Status::Ok
|
|
};
|
|
|
|
let pool_status = if pool_health == Status::Critical { Status::Critical } else { usage_status };
|
|
|
|
// Pool metrics
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_mount_point", pool_name),
|
|
value: MetricValue::String(pool.mount_point.clone()),
|
|
unit: None,
|
|
description: Some(format!("Mount: {}", pool.mount_point)),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_pool_type", pool_name),
|
|
value: MetricValue::String(format!("mergerfs ({}+{})", pool.data_drives.len(), pool.parity_drives.len())),
|
|
unit: None,
|
|
description: Some("Type: mergerfs".to_string()),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_pool_health", pool_name),
|
|
value: MetricValue::String(match pool_health {
|
|
Status::Ok => "healthy".to_string(),
|
|
Status::Warning => "degraded".to_string(),
|
|
Status::Critical => "critical".to_string(),
|
|
_ => "unknown".to_string(),
|
|
}),
|
|
unit: None,
|
|
description: Some("Pool health".to_string()),
|
|
status: pool_health,
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_total_gb", pool_name),
|
|
value: MetricValue::Float(self.bytes_to_gb(pool.total_bytes)),
|
|
unit: Some("GB".to_string()),
|
|
description: Some(format!("Total: {}", self.bytes_to_human_readable(pool.total_bytes))),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_used_gb", pool_name),
|
|
value: MetricValue::Float(self.bytes_to_gb(pool.used_bytes)),
|
|
unit: Some("GB".to_string()),
|
|
description: Some(format!("Used: {}", self.bytes_to_human_readable(pool.used_bytes))),
|
|
status: pool_status.clone(),
|
|
timestamp,
|
|
});
|
|
|
|
let available_bytes = pool.total_bytes.saturating_sub(pool.used_bytes);
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_available_gb", pool_name),
|
|
value: MetricValue::Float(self.bytes_to_gb(available_bytes)),
|
|
unit: Some("GB".to_string()),
|
|
description: Some(format!("Available: {}", self.bytes_to_human_readable(available_bytes))),
|
|
status: Status::Ok,
|
|
timestamp,
|
|
});
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_usage_percent", pool_name),
|
|
value: MetricValue::Float(usage_percent as f32),
|
|
unit: Some("%".to_string()),
|
|
description: Some(format!("Usage: {:.1}%", usage_percent)),
|
|
status: pool_status,
|
|
timestamp,
|
|
});
|
|
|
|
// Individual drive metrics
|
|
for (i, drive) in pool.data_drives.iter().enumerate() {
|
|
self.generate_pool_drive_metrics(metrics, &pool_name, &format!("data_{}", i), drive, timestamp, status_tracker);
|
|
}
|
|
|
|
for (i, drive) in pool.parity_drives.iter().enumerate() {
|
|
self.generate_pool_drive_metrics(metrics, &pool_name, &format!("parity_{}", i), drive, timestamp, status_tracker);
|
|
}
|
|
}
|
|
|
|
/// Generate metrics for drives in mergerfs pools
|
|
fn generate_pool_drive_metrics(
|
|
&self,
|
|
metrics: &mut Vec<Metric>,
|
|
pool_name: &str,
|
|
drive_role: &str,
|
|
drive: &DriveInfo,
|
|
timestamp: u64,
|
|
status_tracker: &mut StatusTracker
|
|
) {
|
|
let drive_health = if drive.health_status == "PASSED" { Status::Ok }
|
|
else if drive.health_status == "FAILED" { Status::Critical }
|
|
else { Status::Unknown };
|
|
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_{}_health", pool_name, drive_role),
|
|
value: MetricValue::String(drive.health_status.clone()),
|
|
unit: None,
|
|
description: Some(format!("{}: {}", drive.device, drive.health_status)),
|
|
status: drive_health,
|
|
timestamp,
|
|
});
|
|
|
|
if let Some(temp) = drive.temperature {
|
|
let temp_status = self.calculate_temperature_status(
|
|
&format!("disk_{}_{}_temperature", pool_name, drive_role), temp, status_tracker
|
|
);
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_{}_temperature", pool_name, drive_role),
|
|
value: MetricValue::Float(temp),
|
|
unit: Some("°C".to_string()),
|
|
description: Some(format!("{}: {:.0}°C", drive.device, temp)),
|
|
status: temp_status,
|
|
timestamp,
|
|
});
|
|
}
|
|
|
|
if let Some(wear) = drive.wear_level {
|
|
let wear_status = if wear >= self.config.wear_critical_percent { Status::Critical }
|
|
else if wear >= self.config.wear_warning_percent { Status::Warning }
|
|
else { Status::Ok };
|
|
metrics.push(Metric {
|
|
name: format!("disk_{}_{}_wear_percent", pool_name, drive_role),
|
|
value: MetricValue::Float(wear),
|
|
unit: Some("%".to_string()),
|
|
description: Some(format!("{}: {:.0}% wear", drive.device, wear)),
|
|
status: wear_status,
|
|
timestamp,
|
|
});
|
|
}
|
|
}
|
|
} |