Fix SMART data and site latency checking issues

- Add sudo to disk collector smartctl commands for proper SMART data access
- Add reqwest dependency with blocking feature for HTTP site checks
- Replace curl-based site latency with reqwest HTTP client implementation
- Maintain 2-second connect timeout and 5-second total timeout
- Fix disk health UNKNOWN status by enabling proper SMART permissions
- Fix nginx site timeout issues by using proper HTTP client with redirect support
This commit is contained in:
2025-10-18 19:14:29 +02:00
parent dcca5bbea3
commit 5d52c5b1aa
4 changed files with 552 additions and 33 deletions

View File

@@ -176,7 +176,8 @@ impl DiskCollector {
/// Get SMART health for a specific physical device
fn get_smart_health(&self, device: &str) -> (String, f32) {
if let Ok(output) = Command::new("smartctl")
if let Ok(output) = Command::new("sudo")
.arg("smartctl")
.arg("-H")
.arg(device)
.output()
@@ -192,7 +193,8 @@ impl DiskCollector {
};
// Try to get temperature
let temperature = if let Ok(temp_output) = Command::new("smartctl")
let temperature = if let Ok(temp_output) = Command::new("sudo")
.arg("smartctl")
.arg("-A")
.arg(device)
.output()

View File

@@ -861,36 +861,30 @@ impl SystemdCollector {
metrics
}
/// Check site latency using curl GET requests
/// Check site latency using HTTP GET requests
fn check_site_latency(&self, url: &str) -> Result<f32, Box<dyn std::error::Error>> {
let _start = std::time::Instant::now();
use std::time::Instant;
use std::time::Duration;
let output = Command::new("curl")
.arg("-X")
.arg("GET") // Explicitly use GET method
.arg("-s")
.arg("-o")
.arg("/dev/null")
.arg("-w")
.arg("%{time_total}")
.arg("--max-time")
.arg("5") // 5 second timeout
.arg("--connect-timeout")
.arg("2") // 2 second connection timeout
.arg("--location") // Follow redirects
.arg("--fail") // Fail on HTTP errors (4xx, 5xx)
.arg(url)
.output()?;
let start = Instant::now();
// Create HTTP client with timeouts (similar to legacy implementation)
let client = reqwest::blocking::Client::builder()
.timeout(Duration::from_secs(5))
.connect_timeout(Duration::from_secs(2))
.redirect(reqwest::redirect::Policy::limited(10))
.build()?;
if !output.status.success() {
return Err(format!("Curl GET request failed for {}", url).into());
// Make GET request and measure latency
let response = client.get(url).send()?;
let latency = start.elapsed().as_millis() as f32;
// Check if response is successful (2xx or 3xx status codes)
if response.status().is_success() || response.status().is_redirection() {
Ok(latency)
} else {
Err(format!("HTTP request failed for {} with status: {}", url, response.status()).into())
}
let time_str = String::from_utf8(output.stdout)?;
let time_seconds: f32 = time_str.trim().parse()?;
let time_ms = time_seconds * 1000.0;
Ok(time_ms)
}
/// Discover nginx sites from configuration files (like the old working implementation)