Implement comprehensive backup monitoring and fix timestamp issues
- Add BackupCollector for reading TOML status files with disk space metrics - Implement BackupWidget with disk usage display and service status details - Fix backup script disk space parsing by adding missing capture_output=True - Update backup widget to show actual disk usage instead of repository size - Fix timestamp parsing to use backup completion time instead of start time - Resolve timezone issues by using UTC timestamps in backup script - Add disk identification metrics (product name, serial number) to backup status - Enhance UI layout with proper backup monitoring integration
This commit is contained in:
@@ -54,16 +54,33 @@ impl Agent {
|
||||
}
|
||||
|
||||
pub async fn run(&mut self, mut shutdown_rx: tokio::sync::oneshot::Receiver<()>) -> Result<()> {
|
||||
info!("Starting agent main loop");
|
||||
info!("Starting agent main loop with separated collection and transmission");
|
||||
|
||||
// CRITICAL: Collect ALL data immediately at startup before entering the loop
|
||||
info!("Performing initial FORCE collection of all metrics at startup");
|
||||
if let Err(e) = self.collect_all_metrics_force().await {
|
||||
error!("Failed to collect initial metrics: {}", e);
|
||||
} else {
|
||||
info!("Initial metric collection completed - all data cached and ready");
|
||||
}
|
||||
|
||||
// Separate intervals for collection and transmission
|
||||
let mut collection_interval = interval(Duration::from_secs(self.config.collection_interval_seconds));
|
||||
let mut transmission_interval = interval(Duration::from_secs(1)); // ZMQ broadcast every 1 second
|
||||
let mut notification_check_interval = interval(Duration::from_secs(30)); // Check notifications every 30s
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = collection_interval.tick() => {
|
||||
if let Err(e) = self.collect_and_publish_metrics().await {
|
||||
error!("Failed to collect and publish metrics: {}", e);
|
||||
// Only collect and cache metrics, no ZMQ transmission
|
||||
if let Err(e) = self.collect_metrics_only().await {
|
||||
error!("Failed to collect metrics: {}", e);
|
||||
}
|
||||
}
|
||||
_ = transmission_interval.tick() => {
|
||||
// Send all cached metrics via ZMQ every 1 second
|
||||
if let Err(e) = self.broadcast_all_cached_metrics().await {
|
||||
error!("Failed to broadcast cached metrics: {}", e);
|
||||
}
|
||||
}
|
||||
_ = notification_check_interval.tick() => {
|
||||
@@ -87,10 +104,29 @@ impl Agent {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn collect_and_publish_metrics(&mut self) -> Result<()> {
|
||||
debug!("Starting metric collection cycle");
|
||||
async fn collect_all_metrics_force(&mut self) -> Result<()> {
|
||||
info!("Starting FORCE metric collection for startup");
|
||||
|
||||
// Collect all metrics from all collectors
|
||||
// Force collect all metrics from all collectors immediately
|
||||
let metrics = self.metric_manager.collect_all_metrics_force().await?;
|
||||
|
||||
if metrics.is_empty() {
|
||||
error!("No metrics collected during force collection!");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!("Force collected and cached {} metrics", metrics.len());
|
||||
|
||||
// Check for status changes and send notifications
|
||||
self.check_status_changes(&metrics).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn collect_metrics_only(&mut self) -> Result<()> {
|
||||
debug!("Starting metric collection cycle (cache only)");
|
||||
|
||||
// Collect all metrics from all collectors and cache them
|
||||
let metrics = self.metric_manager.collect_all_metrics().await?;
|
||||
|
||||
if metrics.is_empty() {
|
||||
@@ -98,16 +134,32 @@ impl Agent {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!("Collected {} metrics", metrics.len());
|
||||
debug!("Collected and cached {} metrics", metrics.len());
|
||||
|
||||
// Check for status changes and send notifications
|
||||
self.check_status_changes(&metrics).await;
|
||||
|
||||
// Create and send message
|
||||
let message = MetricMessage::new(self.hostname.clone(), metrics);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn broadcast_all_cached_metrics(&mut self) -> Result<()> {
|
||||
debug!("Broadcasting all cached metrics via ZMQ");
|
||||
|
||||
// Get all cached metrics from the metric manager
|
||||
let cached_metrics = self.metric_manager.get_all_cached_metrics().await?;
|
||||
|
||||
if cached_metrics.is_empty() {
|
||||
debug!("No cached metrics to broadcast");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
debug!("Broadcasting {} cached metrics", cached_metrics.len());
|
||||
|
||||
// Create and send message with all cached data
|
||||
let message = MetricMessage::new(self.hostname.clone(), cached_metrics);
|
||||
self.zmq_handler.publish_metrics(&message).await?;
|
||||
|
||||
debug!("Metrics published successfully");
|
||||
debug!("Cached metrics broadcasted successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -146,7 +198,7 @@ impl Agent {
|
||||
match command {
|
||||
AgentCommand::CollectNow => {
|
||||
info!("Processing CollectNow command");
|
||||
if let Err(e) = self.collect_and_publish_metrics().await {
|
||||
if let Err(e) = self.collect_metrics_only().await {
|
||||
error!("Failed to collect metrics on command: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user