Implement hysteresis for metric status changes to prevent flapping
Add comprehensive hysteresis support to prevent status oscillation near threshold boundaries while maintaining responsive alerting. Key Features: - HysteresisThresholds with configurable upper/lower limits - StatusTracker for per-metric status history - Default gaps: CPU load 10%, memory 5%, disk temp 5°C Updated Components: - CPU load collector (5-minute average with hysteresis) - Memory usage collector (percentage-based thresholds) - Disk temperature collector (SMART data monitoring) - All collectors updated to support StatusTracker interface Cache Interval Adjustments: - Service status: 60s → 10s (faster response) - Disk usage: 300s → 60s (more frequent checks) - Backup status: 900s → 60s (quicker updates) - SMART data: moved to 600s tier (10 minutes) Architecture: - Individual metric status calculation in collectors - Centralized StatusTracker in MetricCollectionManager - Status aggregation preserved in dashboard widgets
This commit is contained in:
@@ -1,14 +1,14 @@
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use tracing::{info, error};
|
||||
use tracing::{error, info};
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
mod agent;
|
||||
mod cache;
|
||||
mod config;
|
||||
mod communication;
|
||||
mod metrics;
|
||||
mod collectors;
|
||||
mod communication;
|
||||
mod config;
|
||||
mod metrics;
|
||||
mod notifications;
|
||||
mod utils;
|
||||
|
||||
@@ -22,7 +22,7 @@ struct Cli {
|
||||
/// Increase logging verbosity (-v, -vv)
|
||||
#[arg(short, long, action = clap::ArgAction::Count)]
|
||||
verbose: u8,
|
||||
|
||||
|
||||
/// Configuration file path
|
||||
#[arg(short, long)]
|
||||
config: Option<String>,
|
||||
@@ -31,32 +31,32 @@ struct Cli {
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
|
||||
// Setup logging
|
||||
let log_level = match cli.verbose {
|
||||
0 => "info",
|
||||
1 => "debug",
|
||||
1 => "debug",
|
||||
_ => "trace",
|
||||
};
|
||||
|
||||
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env().add_directive(log_level.parse()?))
|
||||
.init();
|
||||
|
||||
|
||||
info!("CM Dashboard Agent starting with individual metrics architecture...");
|
||||
|
||||
|
||||
// Create and run agent
|
||||
let mut agent = Agent::new(cli.config).await?;
|
||||
|
||||
|
||||
// Setup graceful shutdown channel
|
||||
let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel();
|
||||
|
||||
|
||||
let ctrl_c = async {
|
||||
tokio::signal::ctrl_c()
|
||||
.await
|
||||
.expect("failed to install Ctrl+C handler");
|
||||
};
|
||||
|
||||
|
||||
// Run agent with graceful shutdown
|
||||
tokio::select! {
|
||||
result = agent.run(shutdown_rx) => {
|
||||
@@ -72,7 +72,7 @@ async fn main() -> Result<()> {
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
info!("Agent shutdown complete");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user