Implement hysteresis for metric status changes to prevent flapping
Add comprehensive hysteresis support to prevent status oscillation near threshold boundaries while maintaining responsive alerting. Key Features: - HysteresisThresholds with configurable upper/lower limits - StatusTracker for per-metric status history - Default gaps: CPU load 10%, memory 5%, disk temp 5°C Updated Components: - CPU load collector (5-minute average with hysteresis) - Memory usage collector (percentage-based thresholds) - Disk temperature collector (SMART data monitoring) - All collectors updated to support StatusTracker interface Cache Interval Adjustments: - Service status: 60s → 10s (faster response) - Disk usage: 300s → 60s (more frequent checks) - Backup status: 900s → 60s (quicker updates) - SMART data: moved to 600s tier (10 minutes) Architecture: - Individual metric status calculation in collectors - Centralized StatusTracker in MetricCollectionManager - Status aggregation preserved in dashboard widgets
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
use anyhow::Result;
|
||||
use cm_dashboard_shared::Metric;
|
||||
use cm_dashboard_shared::{Metric, StatusTracker};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Instant;
|
||||
use tracing::{debug, error, info};
|
||||
@@ -16,6 +16,7 @@ pub struct MetricCollectionManager {
|
||||
collectors: Vec<Box<dyn Collector>>,
|
||||
cache_manager: MetricCacheManager,
|
||||
last_collection_times: HashMap<String, Instant>,
|
||||
status_tracker: StatusTracker,
|
||||
}
|
||||
|
||||
impl MetricCollectionManager {
|
||||
@@ -117,6 +118,7 @@ impl MetricCollectionManager {
|
||||
collectors,
|
||||
cache_manager,
|
||||
last_collection_times: HashMap::new(),
|
||||
status_tracker: StatusTracker::new(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -134,7 +136,7 @@ impl MetricCollectionManager {
|
||||
for collector in &self.collectors {
|
||||
let collector_name = collector.name();
|
||||
|
||||
match collector.collect().await {
|
||||
match collector.collect(&mut self.status_tracker).await {
|
||||
Ok(metrics) => {
|
||||
info!(
|
||||
"Force collected {} metrics from {} collector",
|
||||
@@ -200,7 +202,7 @@ impl MetricCollectionManager {
|
||||
|
||||
if should_collect {
|
||||
collecting_fresh.insert(collector_name.to_string());
|
||||
match collector.collect().await {
|
||||
match collector.collect(&mut self.status_tracker).await {
|
||||
Ok(metrics) => {
|
||||
// Collector returned fresh metrics (debug logging disabled for performance)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user