Implement simple persistent cache with automatic saving on status changes

This commit is contained in:
Christoffer Martinsson 2025-10-21 20:12:19 +02:00
parent 338c4457a5
commit a08670071c
6 changed files with 116 additions and 104 deletions

View File

@ -179,7 +179,7 @@ impl Agent {
async fn process_metrics(&mut self, metrics: &[Metric]) { async fn process_metrics(&mut self, metrics: &[Metric]) {
for metric in metrics { for metric in metrics {
self.host_status_manager.process_metric(metric, &mut self.notification_manager).await; self.host_status_manager.process_metric(metric, &mut self.notification_manager, self.metric_manager.get_cache_manager()).await;
} }
} }

170
agent/src/cache/mod.rs vendored
View File

@ -1,101 +1,119 @@
use cm_dashboard_shared::{CacheConfig, Metric}; use cm_dashboard_shared::{CacheConfig, Metric};
use std::collections::HashMap; use std::collections::HashMap;
use std::time::Instant; use std::fs;
use std::path::Path;
use std::sync::Arc;
use tokio::sync::RwLock; use tokio::sync::RwLock;
use tracing::warn; use tracing::{info, warn, error};
mod cached_metric; /// Simple persistent cache for metrics
mod manager; pub struct SimpleCache {
metrics: RwLock<HashMap<String, Metric>>,
pub use cached_metric::CachedMetric; persist_path: String,
pub use manager::MetricCacheManager;
/// Central cache for individual metrics with configurable tiers
pub struct ConfigurableCache {
cache: RwLock<HashMap<String, CachedMetric>>,
config: CacheConfig,
} }
impl ConfigurableCache { impl SimpleCache {
pub fn new(config: CacheConfig) -> Self { pub fn new(config: CacheConfig) -> Self {
Self { let cache = Self {
cache: RwLock::new(HashMap::new()), metrics: RwLock::new(HashMap::new()),
config, persist_path: config.persist_path,
} };
// Load from disk on startup
cache.load_from_disk();
cache
} }
/// Store metric in cache /// Store metric in cache
pub async fn store_metric(&self, metric: Metric) { pub async fn store_metric(&self, metric: Metric) {
if !self.config.enabled { let mut metrics = self.metrics.write().await;
return; metrics.insert(metric.name.clone(), metric);
}
let mut cache = self.cache.write().await;
// Enforce max entries limit
if cache.len() >= self.config.max_entries {
self.cleanup_old_entries(&mut cache).await;
}
let cached_metric = CachedMetric {
metric: metric.clone(),
collected_at: Instant::now(),
access_count: 1,
};
cache.insert(metric.name.clone(), cached_metric);
// Cached metric (debug logging disabled for performance)
} }
/// Get all cached metrics (including expired ones) for broadcasting /// Get all cached metrics
pub async fn get_all_cached_metrics(&self) -> Vec<Metric> { pub async fn get_all_cached_metrics(&self) -> Vec<Metric> {
if !self.config.enabled { let metrics = self.metrics.read().await;
return vec![]; metrics.values().cloned().collect()
}
let cache = self.cache.read().await;
let mut all_metrics = Vec::new();
for cached_metric in cache.values() {
all_metrics.push(cached_metric.metric.clone());
}
all_metrics
} }
/// Background cleanup of old entries /// Save cache to disk
async fn cleanup_old_entries(&self, cache: &mut HashMap<String, CachedMetric>) { pub async fn save_to_disk(&self) {
let mut to_remove = Vec::new(); let metrics = self.metrics.read().await;
for (metric_name, cached_metric) in cache.iter() { // Create directory if needed
let cache_interval = self.config.default_ttl_seconds; if let Some(parent) = Path::new(&self.persist_path).parent() {
let elapsed = cached_metric.collected_at.elapsed().as_secs(); if let Err(e) = fs::create_dir_all(parent) {
warn!("Failed to create cache directory {}: {}", parent.display(), e);
// Remove entries that are way past their expiration (2x interval) return;
if elapsed > cache_interval * 2 {
to_remove.push(metric_name.clone());
} }
} }
for metric_name in to_remove { // Serialize and save
cache.remove(&metric_name); match serde_json::to_string_pretty(&*metrics) {
} Ok(json) => {
if let Err(e) = fs::write(&self.persist_path, json) {
// If still too many entries, remove least recently accessed error!("Failed to save cache to {}: {}", self.persist_path, e);
if cache.len() >= self.config.max_entries { }
let mut entries: Vec<_> = cache
.iter()
.map(|(k, v)| (k.clone(), v.access_count))
.collect();
entries.sort_by_key(|(_, access_count)| *access_count);
let excess = cache.len() - (self.config.max_entries * 3 / 4); // Remove 25%
for (metric_name, _) in entries.iter().take(excess) {
cache.remove(metric_name);
} }
Err(e) => {
error!("Failed to serialize cache: {}", e);
}
}
}
warn!("Cache cleanup removed {} entries due to size limit", excess); /// Load cache from disk
fn load_from_disk(&self) {
match fs::read_to_string(&self.persist_path) {
Ok(content) => {
match serde_json::from_str::<HashMap<String, Metric>>(&content) {
Ok(loaded_metrics) => {
if let Ok(mut metrics) = self.metrics.try_write() {
*metrics = loaded_metrics;
info!("Loaded {} metrics from cache", metrics.len());
}
}
Err(e) => {
warn!("Failed to parse cache file {}: {}", self.persist_path, e);
}
}
}
Err(_) => {
info!("No cache file found at {}, starting fresh", self.persist_path);
}
} }
} }
} }
#[derive(Clone)]
pub struct MetricCacheManager {
cache: Arc<SimpleCache>,
}
impl MetricCacheManager {
pub fn new(config: CacheConfig) -> Self {
Self {
cache: Arc::new(SimpleCache::new(config)),
}
}
pub async fn store_metric(&self, metric: Metric) {
self.cache.store_metric(metric).await;
}
pub async fn cache_metric(&self, metric: Metric) {
self.store_metric(metric).await;
}
pub async fn start_background_tasks(&self) {
// No background tasks needed for simple cache
}
pub async fn get_all_cached_metrics(&self) -> Result<Vec<Metric>, anyhow::Error> {
Ok(self.cache.get_all_cached_metrics().await)
}
pub async fn save_to_disk(&self) {
self.cache.save_to_disk().await;
}
}

View File

@ -112,14 +112,8 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
} }
// Validate cache configuration // Validate cache configuration
if config.cache.enabled { if config.cache.persist_path.is_empty() {
if config.cache.default_ttl_seconds == 0 { bail!("Cache persist path cannot be empty");
bail!("Cache TTL cannot be 0");
}
if config.cache.max_entries == 0 {
bail!("Cache max entries cannot be 0");
}
} }
Ok(()) Ok(())

View File

@ -240,7 +240,7 @@ impl MetricCollectionManager {
/// Get all cached metrics from the cache manager /// Get all cached metrics from the cache manager
pub async fn get_all_cached_metrics(&self) -> Result<Vec<Metric>> { pub async fn get_all_cached_metrics(&self) -> Result<Vec<Metric>> {
let cached_metrics = self.cache_manager.get_all_cached_metrics().await; let cached_metrics = self.cache_manager.get_all_cached_metrics().await?;
debug!( debug!(
"Retrieved {} cached metrics for broadcast", "Retrieved {} cached metrics for broadcast",
cached_metrics.len() cached_metrics.len()
@ -248,4 +248,8 @@ impl MetricCollectionManager {
Ok(cached_metrics) Ok(cached_metrics)
} }
pub fn get_cache_manager(&self) -> &MetricCacheManager {
&self.cache_manager
}
} }

View File

@ -9,7 +9,6 @@ use chrono::Utc;
pub struct HostStatusConfig { pub struct HostStatusConfig {
pub enabled: bool, pub enabled: bool,
pub aggregation_method: String, // "worst_case" pub aggregation_method: String, // "worst_case"
pub update_interval_seconds: u64,
pub notification_interval_seconds: u64, pub notification_interval_seconds: u64,
} }
@ -18,7 +17,6 @@ impl Default for HostStatusConfig {
Self { Self {
enabled: true, enabled: true,
aggregation_method: "worst_case".to_string(), aggregation_method: "worst_case".to_string(),
update_interval_seconds: 5,
notification_interval_seconds: 30, notification_interval_seconds: 30,
} }
} }
@ -72,7 +70,7 @@ impl HostStatusManager {
/// Update the status of a specific service and recalculate host status /// Update the status of a specific service and recalculate host status
/// Updates real-time status and buffers changes for email notifications /// Updates real-time status and buffers changes for email notifications
pub fn update_service_status(&mut self, service: String, status: Status) { pub fn update_service_status(&mut self, service: String, status: Status, cache_manager: Option<&crate::cache::MetricCacheManager>) {
if !self.config.enabled { if !self.config.enabled {
return; return;
} }
@ -84,6 +82,14 @@ impl HostStatusManager {
return; return;
} }
// Save cache when status changes (clone cache manager reference for async)
if let Some(cache) = cache_manager {
let cache = cache.clone();
tokio::spawn(async move {
cache.save_to_disk().await;
});
}
// Initialize batch if this is the first change // Initialize batch if this is the first change
if self.batch_start_time.is_none() { if self.batch_start_time.is_none() {
self.batch_start_time = Some(Instant::now()); self.batch_start_time = Some(Instant::now());
@ -163,9 +169,9 @@ impl HostStatusManager {
/// Process a metric - updates status (notifications handled separately via batching) /// Process a metric - updates status (notifications handled separately via batching)
pub async fn process_metric(&mut self, metric: &Metric, _notification_manager: &mut crate::notifications::NotificationManager) { pub async fn process_metric(&mut self, metric: &Metric, _notification_manager: &mut crate::notifications::NotificationManager, cache_manager: &crate::cache::MetricCacheManager) {
// Just update status - notifications are handled by process_pending_notifications // Just update status - notifications are handled by process_pending_notifications
self.update_service_status(metric.name.clone(), metric.status); self.update_service_status(metric.name.clone(), metric.status, Some(cache_manager));
} }
/// Process pending notifications - call this at notification intervals /// Process pending notifications - call this at notification intervals

View File

@ -3,23 +3,13 @@ use serde::{Deserialize, Serialize};
/// Cache configuration /// Cache configuration
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Clone, Deserialize, Serialize)]
pub struct CacheConfig { pub struct CacheConfig {
pub enabled: bool, pub persist_path: String,
pub default_ttl_seconds: u64,
pub max_entries: usize,
pub warming_timeout_seconds: u64,
pub background_refresh_enabled: bool,
pub cleanup_interval_seconds: u64,
} }
impl Default for CacheConfig { impl Default for CacheConfig {
fn default() -> Self { fn default() -> Self {
Self { Self {
enabled: true, persist_path: "/var/lib/cm-dashboard/cache.json".to_string(),
default_ttl_seconds: 30,
max_entries: 10000,
warming_timeout_seconds: 3,
background_refresh_enabled: true,
cleanup_interval_seconds: 1800,
} }
} }
} }