Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e8e50ef9bb | |||
| 0faed9309e | |||
| c980346d05 | |||
| 3e3d3f0c2b | |||
| 9eb7444d56 |
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.58"
|
version = "0.1.63"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
@@ -292,7 +292,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.58"
|
version = "0.1.63"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@@ -315,7 +315,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.58"
|
version = "0.1.63"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-agent"
|
name = "cm-dashboard-agent"
|
||||||
version = "0.1.59"
|
version = "0.1.64"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@@ -78,10 +78,11 @@ impl Agent {
|
|||||||
info!("Initial metric collection completed - all data cached and ready");
|
info!("Initial metric collection completed - all data cached and ready");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Separate intervals for collection, transmission, and email notifications
|
// Separate intervals for collection, transmission, heartbeat, and email notifications
|
||||||
let mut collection_interval =
|
let mut collection_interval =
|
||||||
interval(Duration::from_secs(self.config.collection_interval_seconds));
|
interval(Duration::from_secs(self.config.collection_interval_seconds));
|
||||||
let mut transmission_interval = interval(Duration::from_secs(self.config.zmq.transmission_interval_seconds));
|
let mut transmission_interval = interval(Duration::from_secs(self.config.zmq.transmission_interval_seconds));
|
||||||
|
let mut heartbeat_interval = interval(Duration::from_secs(self.config.zmq.heartbeat_interval_seconds));
|
||||||
let mut notification_interval = interval(Duration::from_secs(self.config.notifications.aggregation_interval_seconds));
|
let mut notification_interval = interval(Duration::from_secs(self.config.notifications.aggregation_interval_seconds));
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
@@ -98,6 +99,12 @@ impl Agent {
|
|||||||
error!("Failed to broadcast metrics: {}", e);
|
error!("Failed to broadcast metrics: {}", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_ = heartbeat_interval.tick() => {
|
||||||
|
// Send standalone heartbeat for host connectivity detection
|
||||||
|
if let Err(e) = self.send_heartbeat().await {
|
||||||
|
error!("Failed to send heartbeat: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
_ = notification_interval.tick() => {
|
_ = notification_interval.tick() => {
|
||||||
// Process batched email notifications (separate from dashboard updates)
|
// Process batched email notifications (separate from dashboard updates)
|
||||||
if let Err(e) = self.host_status_manager.process_pending_notifications(&mut self.notification_manager).await {
|
if let Err(e) = self.host_status_manager.process_pending_notifications(&mut self.notification_manager).await {
|
||||||
@@ -206,7 +213,7 @@ impl Agent {
|
|||||||
let mut status_changed = false;
|
let mut status_changed = false;
|
||||||
for metric in metrics {
|
for metric in metrics {
|
||||||
// Filter excluded metrics from email notification processing only
|
// Filter excluded metrics from email notification processing only
|
||||||
if self.config.exclude_email_metrics.contains(&metric.name) {
|
if self.config.notifications.exclude_email_metrics.contains(&metric.name) {
|
||||||
debug!("Excluding metric '{}' from email notification processing", metric.name);
|
debug!("Excluding metric '{}' from email notification processing", metric.name);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -252,6 +259,19 @@ impl Agent {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Send standalone heartbeat for connectivity detection
|
||||||
|
async fn send_heartbeat(&mut self) -> Result<()> {
|
||||||
|
let heartbeat_metric = self.get_heartbeat_metric();
|
||||||
|
let message = MetricMessage::new(
|
||||||
|
self.hostname.clone(),
|
||||||
|
vec![heartbeat_metric],
|
||||||
|
);
|
||||||
|
|
||||||
|
self.zmq_handler.publish_metrics(&message).await?;
|
||||||
|
debug!("Sent standalone heartbeat for connectivity detection");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
async fn handle_commands(&mut self) -> Result<()> {
|
async fn handle_commands(&mut self) -> Result<()> {
|
||||||
// Try to receive commands (non-blocking)
|
// Try to receive commands (non-blocking)
|
||||||
match self.zmq_handler.try_receive_command() {
|
match self.zmq_handler.try_receive_command() {
|
||||||
|
|||||||
@@ -17,9 +17,6 @@ pub struct AgentConfig {
|
|||||||
pub notifications: NotificationConfig,
|
pub notifications: NotificationConfig,
|
||||||
pub status_aggregation: HostStatusConfig,
|
pub status_aggregation: HostStatusConfig,
|
||||||
pub collection_interval_seconds: u64,
|
pub collection_interval_seconds: u64,
|
||||||
/// List of metric names to exclude from email notifications
|
|
||||||
#[serde(default)]
|
|
||||||
pub exclude_email_metrics: Vec<String>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ZMQ communication configuration
|
/// ZMQ communication configuration
|
||||||
@@ -29,6 +26,9 @@ pub struct ZmqConfig {
|
|||||||
pub command_port: u16,
|
pub command_port: u16,
|
||||||
pub bind_address: String,
|
pub bind_address: String,
|
||||||
pub transmission_interval_seconds: u64,
|
pub transmission_interval_seconds: u64,
|
||||||
|
/// Heartbeat transmission interval in seconds for host connectivity detection
|
||||||
|
#[serde(default = "default_heartbeat_interval_seconds")]
|
||||||
|
pub heartbeat_interval_seconds: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Collector configuration
|
/// Collector configuration
|
||||||
@@ -147,9 +147,16 @@ pub struct NotificationConfig {
|
|||||||
pub rate_limit_minutes: u64,
|
pub rate_limit_minutes: u64,
|
||||||
/// Email notification batching interval in seconds (default: 60)
|
/// Email notification batching interval in seconds (default: 60)
|
||||||
pub aggregation_interval_seconds: u64,
|
pub aggregation_interval_seconds: u64,
|
||||||
|
/// List of metric names to exclude from email notifications
|
||||||
|
#[serde(default)]
|
||||||
|
pub exclude_email_metrics: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn default_heartbeat_interval_seconds() -> u64 {
|
||||||
|
5
|
||||||
|
}
|
||||||
|
|
||||||
impl AgentConfig {
|
impl AgentConfig {
|
||||||
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||||
loader::load_config(path)
|
loader::load_config(path)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard"
|
name = "cm-dashboard"
|
||||||
version = "0.1.59"
|
version = "0.1.64"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
@@ -149,6 +149,8 @@ impl Dashboard {
|
|||||||
|
|
||||||
let mut last_metrics_check = Instant::now();
|
let mut last_metrics_check = Instant::now();
|
||||||
let metrics_check_interval = Duration::from_millis(100); // Check for metrics every 100ms
|
let metrics_check_interval = Duration::from_millis(100); // Check for metrics every 100ms
|
||||||
|
let mut last_heartbeat_check = Instant::now();
|
||||||
|
let heartbeat_check_interval = Duration::from_secs(1); // Check for host connectivity every 1 second
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
// Handle terminal events (keyboard input) only if not headless
|
// Handle terminal events (keyboard input) only if not headless
|
||||||
@@ -254,14 +256,8 @@ impl Dashboard {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update TUI with new hosts and metrics (only if not headless)
|
// Update TUI with new metrics (only if not headless)
|
||||||
if let Some(ref mut tui_app) = self.tui_app {
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
let connected_hosts = self
|
|
||||||
.metric_store
|
|
||||||
.get_connected_hosts(Duration::from_secs(self.config.zmq.heartbeat_timeout_seconds));
|
|
||||||
|
|
||||||
|
|
||||||
tui_app.update_hosts(connected_hosts);
|
|
||||||
tui_app.update_metrics(&self.metric_store);
|
tui_app.update_metrics(&self.metric_store);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -280,6 +276,20 @@ impl Dashboard {
|
|||||||
last_metrics_check = Instant::now();
|
last_metrics_check = Instant::now();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for host connectivity changes (heartbeat timeouts) periodically
|
||||||
|
if last_heartbeat_check.elapsed() >= heartbeat_check_interval {
|
||||||
|
let timeout = Duration::from_secs(self.config.zmq.heartbeat_timeout_seconds);
|
||||||
|
|
||||||
|
// Clean up metrics for offline hosts
|
||||||
|
self.metric_store.cleanup_offline_hosts(timeout);
|
||||||
|
|
||||||
|
if let Some(ref mut tui_app) = self.tui_app {
|
||||||
|
let connected_hosts = self.metric_store.get_connected_hosts(timeout);
|
||||||
|
tui_app.update_hosts(connected_hosts);
|
||||||
|
}
|
||||||
|
last_heartbeat_check = Instant::now();
|
||||||
|
}
|
||||||
|
|
||||||
// Render TUI (only if not headless)
|
// Render TUI (only if not headless)
|
||||||
if !self.headless {
|
if !self.headless {
|
||||||
if let Some(ref mut terminal) = self.terminal {
|
if let Some(ref mut terminal) = self.terminal {
|
||||||
|
|||||||
@@ -141,9 +141,9 @@ impl ZmqConsumer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Receive metrics from any connected agent (with timeout)
|
/// Receive metrics from any connected agent (non-blocking)
|
||||||
pub async fn receive_metrics(&mut self) -> Result<Option<MetricMessage>> {
|
pub async fn receive_metrics(&mut self) -> Result<Option<MetricMessage>> {
|
||||||
match self.subscriber.recv_bytes(0) {
|
match self.subscriber.recv_bytes(zmq::DONTWAIT) {
|
||||||
Ok(data) => {
|
Ok(data) => {
|
||||||
debug!("Received {} bytes from ZMQ", data.len());
|
debug!("Received {} bytes from ZMQ", data.len());
|
||||||
|
|
||||||
|
|||||||
@@ -109,6 +109,28 @@ impl MetricStore {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Clean up data for offline hosts
|
||||||
|
pub fn cleanup_offline_hosts(&mut self, timeout: Duration) {
|
||||||
|
let now = Instant::now();
|
||||||
|
let mut hosts_to_cleanup = Vec::new();
|
||||||
|
|
||||||
|
// Find hosts that are offline (no recent heartbeat)
|
||||||
|
for (hostname, &last_heartbeat) in &self.last_heartbeat {
|
||||||
|
if now.duration_since(last_heartbeat) > timeout {
|
||||||
|
hosts_to_cleanup.push(hostname.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear metrics for offline hosts
|
||||||
|
for hostname in hosts_to_cleanup {
|
||||||
|
if let Some(metrics) = self.current_metrics.remove(&hostname) {
|
||||||
|
info!("Cleared {} metrics for offline host: {}", metrics.len(), hostname);
|
||||||
|
}
|
||||||
|
// Keep heartbeat timestamp for reconnection detection
|
||||||
|
// Don't remove from last_heartbeat to track when host was last seen
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Cleanup old data and enforce limits
|
/// Cleanup old data and enforce limits
|
||||||
fn cleanup_host_data(&mut self, hostname: &str) {
|
fn cleanup_host_data(&mut self, hostname: &str) {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
|
|||||||
@@ -90,10 +90,13 @@ pub struct TuiApp {
|
|||||||
user_navigated_away: bool,
|
user_navigated_away: bool,
|
||||||
/// Dashboard configuration
|
/// Dashboard configuration
|
||||||
config: DashboardConfig,
|
config: DashboardConfig,
|
||||||
|
/// Cached localhost hostname to avoid repeated system calls
|
||||||
|
localhost: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TuiApp {
|
impl TuiApp {
|
||||||
pub fn new(config: DashboardConfig) -> Self {
|
pub fn new(config: DashboardConfig) -> Self {
|
||||||
|
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
||||||
let mut app = Self {
|
let mut app = Self {
|
||||||
host_widgets: HashMap::new(),
|
host_widgets: HashMap::new(),
|
||||||
current_host: None,
|
current_host: None,
|
||||||
@@ -102,6 +105,7 @@ impl TuiApp {
|
|||||||
should_quit: false,
|
should_quit: false,
|
||||||
user_navigated_away: false,
|
user_navigated_away: false,
|
||||||
config,
|
config,
|
||||||
|
localhost,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Sort predefined hosts
|
// Sort predefined hosts
|
||||||
@@ -207,13 +211,12 @@ impl TuiApp {
|
|||||||
self.available_hosts = all_hosts;
|
self.available_hosts = all_hosts;
|
||||||
|
|
||||||
// Get the current hostname (localhost) for auto-selection
|
// Get the current hostname (localhost) for auto-selection
|
||||||
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
|
||||||
if !self.available_hosts.is_empty() {
|
if !self.available_hosts.is_empty() {
|
||||||
if self.available_hosts.contains(&localhost) && !self.user_navigated_away {
|
if self.available_hosts.contains(&self.localhost) && !self.user_navigated_away {
|
||||||
// Localhost is available and user hasn't navigated away - switch to it
|
// Localhost is available and user hasn't navigated away - switch to it
|
||||||
self.current_host = Some(localhost.clone());
|
self.current_host = Some(self.localhost.clone());
|
||||||
// Find the actual index of localhost in the sorted list
|
// Find the actual index of localhost in the sorted list
|
||||||
self.host_index = self.available_hosts.iter().position(|h| h == &localhost).unwrap_or(0);
|
self.host_index = self.available_hosts.iter().position(|h| h == &self.localhost).unwrap_or(0);
|
||||||
} else if self.current_host.is_none() {
|
} else if self.current_host.is_none() {
|
||||||
// No current host - select first available (which is localhost if available)
|
// No current host - select first available (which is localhost if available)
|
||||||
self.current_host = Some(self.available_hosts[0].clone());
|
self.current_host = Some(self.available_hosts[0].clone());
|
||||||
@@ -410,9 +413,8 @@ impl TuiApp {
|
|||||||
self.current_host = Some(self.available_hosts[self.host_index].clone());
|
self.current_host = Some(self.available_hosts[self.host_index].clone());
|
||||||
|
|
||||||
// Check if user navigated away from localhost
|
// Check if user navigated away from localhost
|
||||||
let localhost = gethostname::gethostname().to_string_lossy().to_string();
|
|
||||||
if let Some(ref current) = self.current_host {
|
if let Some(ref current) = self.current_host {
|
||||||
if current != &localhost {
|
if current != &self.localhost {
|
||||||
self.user_navigated_away = true;
|
self.user_navigated_away = true;
|
||||||
} else {
|
} else {
|
||||||
self.user_navigated_away = false; // User navigated back to localhost
|
self.user_navigated_away = false; // User navigated back to localhost
|
||||||
@@ -556,6 +558,21 @@ impl TuiApp {
|
|||||||
])
|
])
|
||||||
.split(main_chunks[1]); // main_chunks[1] is now the content area (between title and statusbar)
|
.split(main_chunks[1]); // main_chunks[1] is now the content area (between title and statusbar)
|
||||||
|
|
||||||
|
// Check if current host is offline
|
||||||
|
let current_host_offline = if let Some(hostname) = self.current_host.clone() {
|
||||||
|
self.calculate_host_status(&hostname, metric_store) == Status::Offline
|
||||||
|
} else {
|
||||||
|
true // No host selected is considered offline
|
||||||
|
};
|
||||||
|
|
||||||
|
// If host is offline, render wake-up message instead of panels
|
||||||
|
if current_host_offline {
|
||||||
|
self.render_offline_host_message(frame, main_chunks[1]);
|
||||||
|
self.render_btop_title(frame, main_chunks[0], metric_store);
|
||||||
|
self.render_statusbar(frame, main_chunks[2]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Check if backup panel should be shown
|
// Check if backup panel should be shown
|
||||||
let show_backup = if let Some(hostname) = self.current_host.clone() {
|
let show_backup = if let Some(hostname) = self.current_host.clone() {
|
||||||
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
let host_widgets = self.get_or_create_host_widgets(&hostname);
|
||||||
@@ -809,6 +826,77 @@ impl TuiApp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Render offline host message with wake-up option
|
||||||
|
fn render_offline_host_message(&self, frame: &mut Frame, area: Rect) {
|
||||||
|
use ratatui::layout::Alignment;
|
||||||
|
use ratatui::style::Modifier;
|
||||||
|
use ratatui::text::{Line, Span};
|
||||||
|
use ratatui::widgets::{Block, Borders, Paragraph};
|
||||||
|
|
||||||
|
// Get hostname for message
|
||||||
|
let hostname = self.current_host.as_ref()
|
||||||
|
.map(|h| h.as_str())
|
||||||
|
.unwrap_or("Unknown");
|
||||||
|
|
||||||
|
// Check if host has MAC address for wake-on-LAN
|
||||||
|
let has_mac = self.current_host.as_ref()
|
||||||
|
.and_then(|hostname| self.config.hosts.get(hostname))
|
||||||
|
.and_then(|details| details.mac_address.as_ref())
|
||||||
|
.is_some();
|
||||||
|
|
||||||
|
// Create message content
|
||||||
|
let mut lines = vec![
|
||||||
|
Line::from(Span::styled(
|
||||||
|
format!("Host '{}' is offline", hostname),
|
||||||
|
Style::default().fg(Theme::muted_text()).add_modifier(Modifier::BOLD),
|
||||||
|
)),
|
||||||
|
Line::from(""),
|
||||||
|
];
|
||||||
|
|
||||||
|
if has_mac {
|
||||||
|
lines.push(Line::from(Span::styled(
|
||||||
|
"Press 'w' to wake up host",
|
||||||
|
Style::default().fg(Theme::primary_text()).add_modifier(Modifier::BOLD),
|
||||||
|
)));
|
||||||
|
} else {
|
||||||
|
lines.push(Line::from(Span::styled(
|
||||||
|
"No MAC address configured - cannot wake up",
|
||||||
|
Style::default().fg(Theme::muted_text()),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create centered message
|
||||||
|
let message = Paragraph::new(lines)
|
||||||
|
.block(Block::default()
|
||||||
|
.borders(Borders::ALL)
|
||||||
|
.border_style(Style::default().fg(Theme::muted_text()))
|
||||||
|
.title(" Offline Host ")
|
||||||
|
.title_style(Style::default().fg(Theme::muted_text()).add_modifier(Modifier::BOLD)))
|
||||||
|
.style(Style::default().bg(Theme::background()).fg(Theme::primary_text()))
|
||||||
|
.alignment(Alignment::Center);
|
||||||
|
|
||||||
|
// Center the message in the available area
|
||||||
|
let popup_area = ratatui::layout::Layout::default()
|
||||||
|
.direction(Direction::Vertical)
|
||||||
|
.constraints([
|
||||||
|
Constraint::Percentage(40),
|
||||||
|
Constraint::Length(6),
|
||||||
|
Constraint::Percentage(40),
|
||||||
|
])
|
||||||
|
.split(area)[1];
|
||||||
|
|
||||||
|
let popup_area = ratatui::layout::Layout::default()
|
||||||
|
.direction(Direction::Horizontal)
|
||||||
|
.constraints([
|
||||||
|
Constraint::Percentage(25),
|
||||||
|
Constraint::Percentage(50),
|
||||||
|
Constraint::Percentage(25),
|
||||||
|
])
|
||||||
|
.split(popup_area)[1];
|
||||||
|
|
||||||
|
frame.render_widget(message, popup_area);
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse MAC address string (e.g., "AA:BB:CC:DD:EE:FF") to [u8; 6]
|
/// Parse MAC address string (e.g., "AA:BB:CC:DD:EE:FF") to [u8; 6]
|
||||||
fn parse_mac_address(mac_str: &str) -> Result<[u8; 6], &'static str> {
|
fn parse_mac_address(mac_str: &str) -> Result<[u8; 6], &'static str> {
|
||||||
let parts: Vec<&str> = mac_str.split(':').collect();
|
let parts: Vec<&str> = mac_str.split(':').collect();
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cm-dashboard-shared"
|
name = "cm-dashboard-shared"
|
||||||
version = "0.1.59"
|
version = "0.1.64"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
Reference in New Issue
Block a user