Implement hysteresis for metric status changes to prevent flapping
Add comprehensive hysteresis support to prevent status oscillation near threshold boundaries while maintaining responsive alerting. Key Features: - HysteresisThresholds with configurable upper/lower limits - StatusTracker for per-metric status history - Default gaps: CPU load 10%, memory 5%, disk temp 5°C Updated Components: - CPU load collector (5-minute average with hysteresis) - Memory usage collector (percentage-based thresholds) - Disk temperature collector (SMART data monitoring) - All collectors updated to support StatusTracker interface Cache Interval Adjustments: - Service status: 60s → 10s (faster response) - Disk usage: 300s → 60s (more frequent checks) - Backup status: 900s → 60s (quicker updates) - SMART data: moved to 600s tier (10 minutes) Architecture: - Individual metric status calculation in collectors - Centralized StatusTracker in MetricCollectionManager - Status aggregation preserved in dashboard widgets
This commit is contained in:
@@ -1,16 +1,7 @@
|
||||
use async_trait::async_trait;
|
||||
use cm_dashboard_shared::Metric;
|
||||
use cm_dashboard_shared::{Metric, StatusTracker};
|
||||
use std::time::Duration;
|
||||
|
||||
pub mod cpu;
|
||||
pub mod memory;
|
||||
pub mod disk;
|
||||
pub mod systemd;
|
||||
pub mod backup;
|
||||
pub mod error;
|
||||
|
||||
pub use error::CollectorError;
|
||||
|
||||
/// Performance metrics for a collector
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PerformanceMetrics {
|
||||
@@ -18,69 +9,78 @@ pub struct PerformanceMetrics {
|
||||
pub collection_efficiency_percent: f32,
|
||||
}
|
||||
|
||||
pub mod backup;
|
||||
pub mod cpu;
|
||||
pub mod disk;
|
||||
pub mod error;
|
||||
pub mod memory;
|
||||
pub mod systemd;
|
||||
|
||||
pub use error::CollectorError;
|
||||
|
||||
|
||||
/// Base trait for all collectors with extreme efficiency requirements
|
||||
#[async_trait]
|
||||
pub trait Collector: Send + Sync {
|
||||
/// Name of this collector
|
||||
fn name(&self) -> &str;
|
||||
|
||||
|
||||
/// Collect all metrics this collector provides
|
||||
async fn collect(&self) -> Result<Vec<Metric>, CollectorError>;
|
||||
|
||||
async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError>;
|
||||
|
||||
/// Get performance metrics for monitoring collector efficiency
|
||||
fn get_performance_metrics(&self) -> Option<PerformanceMetrics> {
|
||||
None
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// CPU efficiency rules for all collectors
|
||||
pub mod efficiency {
|
||||
/// CRITICAL: All collectors must follow these efficiency rules to minimize system impact
|
||||
|
||||
/// 1. FILE READING RULES
|
||||
/// - Read entire files in single syscall when possible
|
||||
/// - Use BufReader only for very large files (>4KB)
|
||||
/// - Never read files character by character
|
||||
/// - Cache file descriptors when safe (immutable paths)
|
||||
|
||||
/// 2. PARSING RULES
|
||||
/// - Use split() instead of regex for simple patterns
|
||||
/// - Parse numbers with from_str() not complex parsing
|
||||
/// - Avoid string allocations in hot paths
|
||||
/// - Use str::trim() before parsing numbers
|
||||
|
||||
/// 3. MEMORY ALLOCATION RULES
|
||||
/// - Reuse Vec buffers when possible
|
||||
/// - Pre-allocate collections with known sizes
|
||||
/// - Use str slices instead of String when possible
|
||||
/// - Avoid clone() in hot paths
|
||||
|
||||
/// 4. SYSTEM CALL RULES
|
||||
/// - Minimize syscalls - prefer single reads over multiple
|
||||
/// - Use /proc filesystem efficiently
|
||||
/// - Avoid spawning processes when /proc data available
|
||||
/// - Cache static data (like CPU count)
|
||||
|
||||
/// 5. ERROR HANDLING RULES
|
||||
/// - Use Result<> but minimize allocation in error paths
|
||||
/// - Log errors at debug level only to avoid I/O overhead
|
||||
/// - Graceful degradation - missing metrics better than failing
|
||||
/// - Never panic in collectors
|
||||
|
||||
/// 6. CONCURRENCY RULES
|
||||
/// - Collectors must be thread-safe but avoid locks
|
||||
/// - Use atomic operations for simple counters
|
||||
/// - Avoid shared mutable state between collections
|
||||
/// - Each collection should be independent
|
||||
|
||||
pub const PERFORMANCE_TARGET_OVERHEAD_PERCENT: f32 = 0.1;
|
||||
//! CRITICAL: All collectors must follow these efficiency rules to minimize system impact
|
||||
//!
|
||||
//! # FILE READING RULES
|
||||
//! - Read entire files in single syscall when possible
|
||||
//! - Use BufReader only for very large files (>4KB)
|
||||
//! - Never read files character by character
|
||||
//! - Cache file descriptors when safe (immutable paths)
|
||||
//!
|
||||
//! # PARSING RULES
|
||||
//! - Use split() instead of regex for simple patterns
|
||||
//! - Parse numbers with from_str() not complex parsing
|
||||
//! - Avoid string allocations in hot paths
|
||||
//! - Use str::trim() before parsing numbers
|
||||
//!
|
||||
//! # MEMORY ALLOCATION RULES
|
||||
//! - Reuse Vec buffers when possible
|
||||
//! - Pre-allocate collections with known sizes
|
||||
//! - Use str slices instead of String when possible
|
||||
//! - Avoid clone() in hot paths
|
||||
//!
|
||||
//! # SYSTEM CALL RULES
|
||||
//! - Minimize syscalls - prefer single reads over multiple
|
||||
//! - Use /proc filesystem efficiently
|
||||
//! - Avoid spawning processes when /proc data available
|
||||
//! - Cache static data (like CPU count)
|
||||
//!
|
||||
//! # ERROR HANDLING RULES
|
||||
//! - Use Result<> but minimize allocation in error paths
|
||||
//! - Log errors at debug level only to avoid I/O overhead
|
||||
//! - Graceful degradation - missing metrics better than failing
|
||||
//! - Never panic in collectors
|
||||
//!
|
||||
//! # CONCURRENCY RULES
|
||||
//! - Collectors must be thread-safe but avoid locks
|
||||
//! - Use atomic operations for simple counters
|
||||
//! - Avoid shared mutable state between collections
|
||||
//! - Each collection should be independent
|
||||
}
|
||||
|
||||
/// Utility functions for efficient system data collection
|
||||
pub mod utils {
|
||||
use std::fs;
|
||||
use super::CollectorError;
|
||||
|
||||
use std::fs;
|
||||
|
||||
/// Read entire file content efficiently
|
||||
pub fn read_proc_file(path: &str) -> Result<String, CollectorError> {
|
||||
fs::read_to_string(path).map_err(|e| CollectorError::SystemRead {
|
||||
@@ -88,25 +88,25 @@ pub mod utils {
|
||||
error: e.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
/// Parse float from string slice efficiently
|
||||
pub fn parse_f32(s: &str) -> Result<f32, CollectorError> {
|
||||
s.trim().parse().map_err(|e: std::num::ParseFloatError| CollectorError::Parse {
|
||||
value: s.to_string(),
|
||||
error: e.to_string(),
|
||||
})
|
||||
s.trim()
|
||||
.parse()
|
||||
.map_err(|e: std::num::ParseFloatError| CollectorError::Parse {
|
||||
value: s.to_string(),
|
||||
error: e.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
/// Parse integer from string slice efficiently
|
||||
pub fn parse_u64(s: &str) -> Result<u64, CollectorError> {
|
||||
s.trim().parse().map_err(|e: std::num::ParseIntError| CollectorError::Parse {
|
||||
value: s.to_string(),
|
||||
error: e.to_string(),
|
||||
})
|
||||
s.trim()
|
||||
.parse()
|
||||
.map_err(|e: std::num::ParseIntError| CollectorError::Parse {
|
||||
value: s.to_string(),
|
||||
error: e.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Split string and get nth element safely
|
||||
pub fn split_nth<'a>(s: &'a str, delimiter: char, n: usize) -> Option<&'a str> {
|
||||
s.split(delimiter).nth(n)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user