Testing
This commit is contained in:
parent
3de1e0db19
commit
5e8a0ce108
@ -405,225 +405,6 @@ impl ServiceCollector {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn add_system_metrics_as_services(&self, services: &mut Vec<ServiceData>, healthy: &mut usize, degraded: &mut usize, failed: &mut usize) {
|
|
||||||
// Get system metrics
|
|
||||||
if let Ok((load_1, _load_5, _load_15)) = self.get_cpu_load().await {
|
|
||||||
let cpu_status = self.determine_cpu_status(load_1);
|
|
||||||
match cpu_status.as_str() {
|
|
||||||
"ok" => *healthy += 1,
|
|
||||||
"warning" => *degraded += 1,
|
|
||||||
"critical" => *failed += 1,
|
|
||||||
_ => *degraded += 1,
|
|
||||||
}
|
|
||||||
services.push(ServiceData {
|
|
||||||
name: "CPU Load".to_string(),
|
|
||||||
status: match cpu_status.as_str() {
|
|
||||||
"ok" => ServiceStatus::Running,
|
|
||||||
"warning" => ServiceStatus::Degraded,
|
|
||||||
"critical" => ServiceStatus::Stopped,
|
|
||||||
_ => ServiceStatus::Degraded,
|
|
||||||
},
|
|
||||||
memory_used_mb: 0.0,
|
|
||||||
memory_quota_mb: 0.0,
|
|
||||||
cpu_percent: load_1,
|
|
||||||
sandbox_limit: None,
|
|
||||||
disk_used_gb: 0.0,
|
|
||||||
description: None,
|
|
||||||
sub_service: false,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Ok((memory_used_mb, memory_total_mb)) = self.get_memory_info().await {
|
|
||||||
let memory_status = self.determine_memory_status(memory_used_mb, memory_total_mb);
|
|
||||||
match memory_status.as_str() {
|
|
||||||
"ok" => *healthy += 1,
|
|
||||||
"warning" => *degraded += 1,
|
|
||||||
"critical" => *failed += 1,
|
|
||||||
_ => *degraded += 1,
|
|
||||||
}
|
|
||||||
services.push(ServiceData {
|
|
||||||
name: "Service Memory".to_string(),
|
|
||||||
status: match memory_status.as_str() {
|
|
||||||
"ok" => ServiceStatus::Running,
|
|
||||||
"warning" => ServiceStatus::Degraded,
|
|
||||||
"critical" => ServiceStatus::Stopped,
|
|
||||||
_ => ServiceStatus::Degraded,
|
|
||||||
},
|
|
||||||
memory_used_mb,
|
|
||||||
memory_quota_mb: memory_total_mb,
|
|
||||||
cpu_percent: 0.0,
|
|
||||||
sandbox_limit: None,
|
|
||||||
disk_used_gb: 0.0,
|
|
||||||
description: None,
|
|
||||||
sub_service: false,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(cpu_temp) = self.get_cpu_temperature().await {
|
|
||||||
let temp_status = self.determine_cpu_temp_status(cpu_temp);
|
|
||||||
match temp_status.as_str() {
|
|
||||||
"ok" => *healthy += 1,
|
|
||||||
"warning" => *degraded += 1,
|
|
||||||
"critical" => *failed += 1,
|
|
||||||
_ => *degraded += 1,
|
|
||||||
}
|
|
||||||
services.push(ServiceData {
|
|
||||||
name: "CPU Temperature".to_string(),
|
|
||||||
status: match temp_status.as_str() {
|
|
||||||
"ok" => ServiceStatus::Running,
|
|
||||||
"warning" => ServiceStatus::Degraded,
|
|
||||||
"critical" => ServiceStatus::Stopped,
|
|
||||||
_ => ServiceStatus::Degraded,
|
|
||||||
},
|
|
||||||
memory_used_mb: 0.0,
|
|
||||||
memory_quota_mb: 0.0,
|
|
||||||
cpu_percent: cpu_temp,
|
|
||||||
sandbox_limit: None,
|
|
||||||
disk_used_gb: 0.0,
|
|
||||||
description: None,
|
|
||||||
sub_service: false,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Ok(disk_usage) = self.get_disk_usage().await {
|
|
||||||
services.push(ServiceData {
|
|
||||||
name: "Disk Usage".to_string(),
|
|
||||||
status: ServiceStatus::Running,
|
|
||||||
memory_used_mb: 0.0,
|
|
||||||
memory_quota_mb: 0.0,
|
|
||||||
cpu_percent: 0.0,
|
|
||||||
sandbox_limit: None,
|
|
||||||
disk_used_gb: disk_usage.used_gb,
|
|
||||||
description: None,
|
|
||||||
sub_service: false,
|
|
||||||
});
|
|
||||||
*healthy += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_cpu_load(&self) -> Result<(f32, f32, f32), CollectorError> {
|
|
||||||
let output = tokio::process::Command::new("uptime")
|
|
||||||
.output()
|
|
||||||
.await
|
|
||||||
.map_err(|e| CollectorError::CommandFailed {
|
|
||||||
command: "uptime".to_string(),
|
|
||||||
message: e.to_string()
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let uptime_str = String::from_utf8_lossy(&output.stdout);
|
|
||||||
|
|
||||||
if let Some(load_part) = uptime_str.split("load average:").nth(1) {
|
|
||||||
let load_str = load_part.trim();
|
|
||||||
let loads: Vec<&str> = load_str.split(", ").collect();
|
|
||||||
if loads.len() >= 3 {
|
|
||||||
let load_1 = loads[0].trim().replace(',', ".").parse::<f32>()
|
|
||||||
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 1min load".to_string() })?;
|
|
||||||
let load_5 = loads[1].trim().replace(',', ".").parse::<f32>()
|
|
||||||
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 5min load".to_string() })?;
|
|
||||||
let load_15 = loads[2].trim().replace(',', ".").parse::<f32>()
|
|
||||||
.map_err(|_| CollectorError::ParseError { message: "Failed to parse 15min load".to_string() })?;
|
|
||||||
|
|
||||||
return Ok((load_1, load_5, load_15));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Err(CollectorError::ParseError { message: "Failed to parse load averages".to_string() })
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_cpu_temperature(&self) -> Option<f32> {
|
|
||||||
for i in 0..10 {
|
|
||||||
let type_path = format!("/sys/class/thermal/thermal_zone{}/type", i);
|
|
||||||
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
|
|
||||||
|
|
||||||
if let (Ok(zone_type), Ok(temp_str)) = (
|
|
||||||
fs::read_to_string(&type_path).await,
|
|
||||||
fs::read_to_string(&temp_path).await,
|
|
||||||
) {
|
|
||||||
let zone_type = zone_type.trim();
|
|
||||||
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
|
|
||||||
let temp_c = temp_millic / 1000.0;
|
|
||||||
if temp_c > 20.0 && temp_c < 150.0 {
|
|
||||||
if zone_type == "x86_pkg_temp" || zone_type.contains("coretemp") {
|
|
||||||
return Some(temp_c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i in 0..10 {
|
|
||||||
let temp_path = format!("/sys/class/thermal/thermal_zone{}/temp", i);
|
|
||||||
if let Ok(temp_str) = fs::read_to_string(&temp_path).await {
|
|
||||||
if let Ok(temp_millic) = temp_str.trim().parse::<f32>() {
|
|
||||||
let temp_c = temp_millic / 1000.0;
|
|
||||||
if temp_c > 20.0 && temp_c < 150.0 {
|
|
||||||
return Some(temp_c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_memory_info(&self) -> Result<(f32, f32), CollectorError> {
|
|
||||||
let meminfo = fs::read_to_string("/proc/meminfo")
|
|
||||||
.await
|
|
||||||
.map_err(|e| CollectorError::IoError { message: format!("Failed to read /proc/meminfo: {}", e) })?;
|
|
||||||
|
|
||||||
let mut mem_total = None;
|
|
||||||
let mut mem_available = None;
|
|
||||||
|
|
||||||
for line in meminfo.lines() {
|
|
||||||
if let Some(value_str) = line.strip_prefix("MemTotal:") {
|
|
||||||
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
|
|
||||||
mem_total = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
|
|
||||||
}
|
|
||||||
} else if let Some(value_str) = line.strip_prefix("MemAvailable:") {
|
|
||||||
if let Some(kb_str) = value_str.trim().split_whitespace().next() {
|
|
||||||
mem_available = kb_str.parse::<f32>().ok().map(|kb| kb / 1024.0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
match (mem_total, mem_available) {
|
|
||||||
(Some(total), Some(available)) => {
|
|
||||||
let used = total - available;
|
|
||||||
Ok((used, total))
|
|
||||||
}
|
|
||||||
_ => Err(CollectorError::ParseError { message: "Failed to parse memory info".to_string() }),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn determine_cpu_status(&self, load_1: f32) -> String {
|
|
||||||
if load_1 >= 8.0 {
|
|
||||||
"critical".to_string()
|
|
||||||
} else if load_1 >= 5.0 {
|
|
||||||
"warning".to_string()
|
|
||||||
} else {
|
|
||||||
"ok".to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn determine_memory_status(&self, used_mb: f32, total_mb: f32) -> String {
|
|
||||||
let usage_percent = (used_mb / total_mb) * 100.0;
|
|
||||||
if usage_percent >= 95.0 {
|
|
||||||
"critical".to_string()
|
|
||||||
} else if usage_percent >= 80.0 {
|
|
||||||
"warning".to_string()
|
|
||||||
} else {
|
|
||||||
"ok".to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn determine_cpu_temp_status(&self, temp_c: f32) -> String {
|
|
||||||
if temp_c >= 100.0 {
|
|
||||||
"critical".to_string()
|
|
||||||
} else if temp_c >= 100.0 {
|
|
||||||
"warning".to_string()
|
|
||||||
} else {
|
|
||||||
"ok".to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_service_description_with_cache(&self, service: &str) -> Option<Vec<String>> {
|
async fn get_service_description_with_cache(&self, service: &str) -> Option<Vec<String>> {
|
||||||
// Check if we should update the cache (throttled)
|
// Check if we should update the cache (throttled)
|
||||||
@ -1012,9 +793,6 @@ impl Collector for ServiceCollector {
|
|||||||
let mut total_memory_quota = 0.0;
|
let mut total_memory_quota = 0.0;
|
||||||
let mut total_disk_used = 0.0;
|
let mut total_disk_used = 0.0;
|
||||||
|
|
||||||
// Add system metrics as service entries first
|
|
||||||
self.add_system_metrics_as_services(&mut services, &mut healthy, &mut degraded, &mut failed).await;
|
|
||||||
|
|
||||||
// Collect data from all configured services
|
// Collect data from all configured services
|
||||||
for service in &self.services {
|
for service in &self.services {
|
||||||
match self.get_service_status(service).await {
|
match self.get_service_status(service).await {
|
||||||
|
|||||||
@ -336,7 +336,7 @@ impl App {
|
|||||||
fn check_host_timeouts(&mut self) {
|
fn check_host_timeouts(&mut self) {
|
||||||
let now = Utc::now();
|
let now = Utc::now();
|
||||||
|
|
||||||
for (host_name, state) in self.host_states.iter_mut() {
|
for (_host_name, state) in self.host_states.iter_mut() {
|
||||||
if let Some(last_success) = state.last_success {
|
if let Some(last_success) = state.last_success {
|
||||||
let duration_since_last = now.signed_duration_since(last_success);
|
let duration_since_last = now.signed_duration_since(last_success);
|
||||||
|
|
||||||
|
|||||||
@ -69,12 +69,42 @@ fn render_metrics(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut services = metrics.services.clone();
|
// Sort services but preserve nginx + sites grouping
|
||||||
services.sort_by(|a, b| {
|
let mut services = Vec::new();
|
||||||
|
let mut regular_services = Vec::new();
|
||||||
|
let mut nginx_group = Vec::new();
|
||||||
|
let mut in_nginx_group = false;
|
||||||
|
|
||||||
|
for svc in metrics.services.iter() {
|
||||||
|
if svc.name == "nginx" {
|
||||||
|
// Start nginx group
|
||||||
|
in_nginx_group = true;
|
||||||
|
nginx_group.push(svc.clone());
|
||||||
|
} else if in_nginx_group && svc.sub_service {
|
||||||
|
// Add nginx site to group
|
||||||
|
nginx_group.push(svc.clone());
|
||||||
|
} else {
|
||||||
|
// End nginx group if we were in one
|
||||||
|
if in_nginx_group {
|
||||||
|
in_nginx_group = false;
|
||||||
|
services.append(&mut nginx_group);
|
||||||
|
}
|
||||||
|
regular_services.push(svc.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add any remaining nginx group
|
||||||
|
if !nginx_group.is_empty() {
|
||||||
|
services.append(&mut nginx_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort regular services and add them
|
||||||
|
regular_services.sort_by(|a, b| {
|
||||||
status_weight(&a.status)
|
status_weight(&a.status)
|
||||||
.cmp(&status_weight(&b.status))
|
.cmp(&status_weight(&b.status))
|
||||||
.then_with(|| a.name.cmp(&b.name))
|
.then_with(|| a.name.cmp(&b.name))
|
||||||
});
|
});
|
||||||
|
services.append(&mut regular_services);
|
||||||
|
|
||||||
for svc in services {
|
for svc in services {
|
||||||
let status_level = match svc.status {
|
let status_level = match svc.status {
|
||||||
|
|||||||
@ -166,9 +166,3 @@ fn format_optional_metric(value: Option<f32>, unit: &str) -> String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn format_optional_percent(value: Option<f32>) -> String {
|
|
||||||
match value {
|
|
||||||
Some(number) => format!("{:.0}%", number),
|
|
||||||
None => "—".to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user