Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 85c6c624fb | |||
| eab3f17428 | |||
| 7ad149bbe4 | |||
| b444c88ea0 | |||
| 317cf76bd1 | |||
| 0db1a165b9 | |||
| 3c2955376d | |||
| f09ccabc7f | |||
| 43dd5a901a |
44
CLAUDE.md
44
CLAUDE.md
@@ -156,7 +156,7 @@ Complete migration from string-based metrics to structured JSON data. Eliminates
|
||||
- ✅ Backward compatibility via bridge conversion to existing UI widgets
|
||||
- ✅ All string parsing bugs eliminated
|
||||
|
||||
### Cached Collector Architecture (✅ IMPLEMENTED)
|
||||
### Cached Collector Architecture (🚧 PLANNED)
|
||||
|
||||
**Problem:** Blocking collectors prevent timely ZMQ transmission, causing false "host offline" alerts.
|
||||
|
||||
@@ -199,42 +199,12 @@ Every 1 second:
|
||||
- ✅ System stays responsive even with slow operations
|
||||
- ✅ Slow collectors can use longer timeouts without blocking
|
||||
|
||||
**Implementation Details:**
|
||||
- **Shared cache**: `Arc<RwLock<AgentData>>` initialized at agent startup
|
||||
- **Collector intervals**: Fully configurable via NixOS config (`interval_seconds` per collector)
|
||||
- Recommended: Fast (1-10s): CPU, Memory, Network
|
||||
- Recommended: Medium (30-60s): Backup, NixOS
|
||||
- Recommended: Slow (60-300s): Disk, Systemd
|
||||
- **Independent tasks**: Each collector spawned as separate tokio task in `Agent::new()`
|
||||
- **Cache updates**: Collectors acquire write lock → update → release immediately
|
||||
- **ZMQ sender**: Main loop reads cache every `collection_interval_seconds` and broadcasts
|
||||
- **Notification check**: Runs every `notifications.check_interval_seconds`
|
||||
- **Lock strategy**: Short-lived write locks prevent blocking, read locks for transmission
|
||||
- **Stale data**: Acceptable for slow-changing metrics (SMART data, disk usage)
|
||||
|
||||
**Configuration (NixOS):**
|
||||
All intervals and timeouts configurable in `services/cm-dashboard.nix`:
|
||||
|
||||
Collection Intervals:
|
||||
- `collectors.cpu.interval_seconds` (default: 10s)
|
||||
- `collectors.memory.interval_seconds` (default: 2s)
|
||||
- `collectors.disk.interval_seconds` (default: 300s)
|
||||
- `collectors.systemd.interval_seconds` (default: 10s)
|
||||
- `collectors.backup.interval_seconds` (default: 60s)
|
||||
- `collectors.network.interval_seconds` (default: 10s)
|
||||
- `collectors.nixos.interval_seconds` (default: 60s)
|
||||
- `notifications.check_interval_seconds` (default: 30s)
|
||||
- `collection_interval_seconds` - ZMQ transmission rate (default: 2s)
|
||||
|
||||
Command Timeouts (prevent resource leaks from hung commands):
|
||||
- `collectors.disk.command_timeout_seconds` (default: 30s) - lsblk, smartctl, etc.
|
||||
- `collectors.systemd.command_timeout_seconds` (default: 15s) - systemctl, docker, du
|
||||
- `collectors.network.command_timeout_seconds` (default: 10s) - ip route, ip addr
|
||||
|
||||
**Code Locations:**
|
||||
- agent/src/agent.rs:59-133 - Collector task spawning
|
||||
- agent/src/agent.rs:151-179 - Independent collector task runner
|
||||
- agent/src/agent.rs:199-207 - ZMQ sender in main loop
|
||||
**Implementation:**
|
||||
- Shared `AgentData` cache wrapped in `Arc<RwLock<>>`
|
||||
- Each collector spawned as independent tokio task
|
||||
- Collectors update their section of cache at their own rate
|
||||
- ZMQ sender reads cache every 1s and transmits
|
||||
- Stale data acceptable for slow-changing metrics (disk usage, SMART)
|
||||
|
||||
### Maintenance Mode
|
||||
|
||||
|
||||
582
Cargo.lock
generated
582
Cargo.lock
generated
@@ -1,6 +1,5 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
# This file is automatically generated by Cargo.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
@@ -115,6 +114,12 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic-waker"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.5.0"
|
||||
@@ -145,12 +150,62 @@ version = "2.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
|
||||
|
||||
[[package]]
|
||||
name = "bollard"
|
||||
version = "0.17.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d41711ad46fda47cd701f6908e59d1bd6b9a2b7464c0d0aeab95c6d37096ff8a"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"bollard-stubs",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"hex",
|
||||
"http 1.4.0",
|
||||
"http-body-util",
|
||||
"hyper 1.8.1",
|
||||
"hyper-named-pipe",
|
||||
"hyper-util",
|
||||
"hyperlocal",
|
||||
"log",
|
||||
"pin-project-lite",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"serde_repr",
|
||||
"serde_urlencoded",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tower-service",
|
||||
"url",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bollard-stubs"
|
||||
version = "1.45.0-rc.26.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d7c5415e3a6bc6d3e99eff6268e488fd4ee25e7b28c10f08fa6760bd9de16e4"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_repr",
|
||||
"serde_with",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.11.0"
|
||||
@@ -165,9 +220,9 @@ checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.46"
|
||||
version = "1.2.47"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36"
|
||||
checksum = "cd405d82c84ff7f35739f175f67d8b9fb7687a0e84ccdc78bd3568839827cf07"
|
||||
dependencies = [
|
||||
"find-msvc-tools",
|
||||
"jobserver",
|
||||
@@ -191,6 +246,12 @@ version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||
|
||||
[[package]]
|
||||
name = "cfg_aliases"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.42"
|
||||
@@ -239,9 +300,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.52"
|
||||
version = "4.5.53"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa8120877db0e5c011242f96806ce3c94e0737ab8108532a76a3300a01db2ab8"
|
||||
checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
@@ -249,9 +310,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.5.52"
|
||||
version = "4.5.53"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02576b399397b659c26064fbc92a75fede9d18ffd5f80ca1cd74ddab167016e1"
|
||||
checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
@@ -279,7 +340,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.194"
|
||||
version = "0.1.200"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@@ -290,7 +351,7 @@ dependencies = [
|
||||
"ratatui",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"toml",
|
||||
"tracing",
|
||||
@@ -301,20 +362,26 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.194"
|
||||
version = "0.1.200"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"bollard",
|
||||
"chrono",
|
||||
"chrono-tz",
|
||||
"clap",
|
||||
"cm-dashboard-shared",
|
||||
"futures",
|
||||
"gethostname",
|
||||
"lettre",
|
||||
"libc",
|
||||
"netlink-packet-route",
|
||||
"nix 0.29.0",
|
||||
"reqwest",
|
||||
"rtnetlink",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"toml",
|
||||
"tracing",
|
||||
@@ -324,12 +391,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.194"
|
||||
version = "0.1.200"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -435,6 +502,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deranged"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587"
|
||||
dependencies = [
|
||||
"powerfmt",
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dircpy"
|
||||
version = "0.3.19"
|
||||
@@ -457,6 +534,12 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dyn-clone"
|
||||
version = "1.0.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
@@ -552,6 +635,21 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-executor",
|
||||
"futures-io",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.31"
|
||||
@@ -559,6 +657,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -567,12 +666,34 @@ version = "0.3.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.31"
|
||||
@@ -591,8 +712,11 @@ version = "0.3.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-macro",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"memchr",
|
||||
"pin-project-lite",
|
||||
@@ -633,14 +757,20 @@ dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"futures-util",
|
||||
"http",
|
||||
"indexmap",
|
||||
"http 0.2.12",
|
||||
"indexmap 2.12.1",
|
||||
"slab",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.5"
|
||||
@@ -664,9 +794,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.16.0"
|
||||
version = "0.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
|
||||
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
@@ -680,6 +810,12 @@ version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "hex"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.12"
|
||||
@@ -691,6 +827,16 @@ dependencies = [
|
||||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http-body"
|
||||
version = "0.4.6"
|
||||
@@ -698,7 +844,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"http",
|
||||
"http 0.2.12",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http-body"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"http 1.4.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http-body-util"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
@@ -725,8 +894,8 @@ dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2",
|
||||
"http",
|
||||
"http-body",
|
||||
"http 0.2.12",
|
||||
"http-body 0.4.6",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
"itoa",
|
||||
@@ -738,6 +907,43 @@ dependencies = [
|
||||
"want",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
|
||||
dependencies = [
|
||||
"atomic-waker",
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
"itoa",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"smallvec",
|
||||
"tokio",
|
||||
"want",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-named-pipe"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278"
|
||||
dependencies = [
|
||||
"hex",
|
||||
"hyper 1.8.1",
|
||||
"hyper-util",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tower-service",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-tls"
|
||||
version = "0.5.0"
|
||||
@@ -745,12 +951,48 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"hyper",
|
||||
"hyper 0.14.32",
|
||||
"native-tls",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-util"
|
||||
version = "0.1.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52e9a2a24dc5c6821e71a7030e1e14b7b632acac55c40e9d2e082c621261bb56"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"hyper 1.8.1",
|
||||
"libc",
|
||||
"pin-project-lite",
|
||||
"socket2 0.6.1",
|
||||
"tokio",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyperlocal"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7"
|
||||
dependencies = [
|
||||
"hex",
|
||||
"http-body-util",
|
||||
"hyper 1.8.1",
|
||||
"hyper-util",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.64"
|
||||
@@ -879,12 +1121,25 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.12.0"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
|
||||
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown 0.12.3",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.16.0",
|
||||
"hashbrown 0.16.1",
|
||||
"serde",
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1084,6 +1339,93 @@ dependencies = [
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "netlink-packet-core"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72724faf704479d67b388da142b186f916188505e7e0b26719019c525882eda4"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"byteorder",
|
||||
"netlink-packet-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "netlink-packet-route"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74c171cd77b4ee8c7708da746ce392440cb7bcf618d122ec9ecc607b12938bf4"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"byteorder",
|
||||
"libc",
|
||||
"log",
|
||||
"netlink-packet-core",
|
||||
"netlink-packet-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "netlink-packet-utils"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ede8a08c71ad5a95cdd0e4e52facd37190977039a4704eb82a283f713747d34"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"byteorder",
|
||||
"paste",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "netlink-proto"
|
||||
version = "0.11.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72452e012c2f8d612410d89eea01e2d9b56205274abb35d53f60200b2ec41d60"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures",
|
||||
"log",
|
||||
"netlink-packet-core",
|
||||
"netlink-sys",
|
||||
"thiserror 2.0.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "netlink-sys"
|
||||
version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16c903aa70590cb93691bf97a767c8d1d6122d2cc9070433deb3bbf36ce8bd23"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures",
|
||||
"libc",
|
||||
"log",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"cfg-if",
|
||||
"cfg_aliases",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "8.0.0"
|
||||
@@ -1102,6 +1444,12 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-conv"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
@@ -1285,6 +1633,12 @@ dependencies = [
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "powerfmt"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.103"
|
||||
@@ -1387,6 +1741,26 @@ dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ref-cast"
|
||||
version = "1.0.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d"
|
||||
dependencies = [
|
||||
"ref-cast-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ref-cast-impl"
|
||||
version = "1.0.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.12.2"
|
||||
@@ -1428,9 +1802,9 @@ dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2",
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"http 0.2.12",
|
||||
"http-body 0.4.6",
|
||||
"hyper 0.14.32",
|
||||
"hyper-tls",
|
||||
"ipnet",
|
||||
"js-sys",
|
||||
@@ -1456,6 +1830,24 @@ dependencies = [
|
||||
"winreg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rtnetlink"
|
||||
version = "0.14.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b684475344d8df1859ddb2d395dd3dac4f8f3422a1aa0725993cb375fc5caba5"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"log",
|
||||
"netlink-packet-core",
|
||||
"netlink-packet-route",
|
||||
"netlink-packet-utils",
|
||||
"netlink-proto",
|
||||
"netlink-sys",
|
||||
"nix 0.27.1",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "1.1.2"
|
||||
@@ -1508,6 +1900,30 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schemars"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f"
|
||||
dependencies = [
|
||||
"dyn-clone",
|
||||
"ref-cast",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schemars"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289"
|
||||
dependencies = [
|
||||
"dyn-clone",
|
||||
"ref-cast",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
@@ -1580,6 +1996,17 @@ dependencies = [
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_repr"
|
||||
version = "0.1.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_spanned"
|
||||
version = "0.6.9"
|
||||
@@ -1601,6 +2028,24 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_with"
|
||||
version = "3.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"chrono",
|
||||
"hex",
|
||||
"indexmap 1.9.3",
|
||||
"indexmap 2.12.1",
|
||||
"schemars 0.9.0",
|
||||
"schemars 1.1.0",
|
||||
"serde_core",
|
||||
"serde_json",
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sharded-slab"
|
||||
version = "0.1.7"
|
||||
@@ -1639,9 +2084,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.6"
|
||||
version = "1.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
|
||||
checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
@@ -1733,9 +2178,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.110"
|
||||
version = "2.0.111"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea"
|
||||
checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -1818,7 +2263,16 @@ version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
"thiserror-impl 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "2.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
|
||||
dependencies = [
|
||||
"thiserror-impl 2.0.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1832,6 +2286,17 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "2.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.9"
|
||||
@@ -1841,6 +2306,37 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
|
||||
dependencies = [
|
||||
"deranged",
|
||||
"itoa",
|
||||
"num-conv",
|
||||
"powerfmt",
|
||||
"serde",
|
||||
"time-core",
|
||||
"time-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time-core"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
|
||||
|
||||
[[package]]
|
||||
name = "time-macros"
|
||||
version = "0.2.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
|
||||
dependencies = [
|
||||
"num-conv",
|
||||
"time-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinystr"
|
||||
version = "0.8.2"
|
||||
@@ -1929,7 +2425,7 @@ version = "0.22.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"indexmap 2.12.1",
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
@@ -1962,9 +2458,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.30"
|
||||
version = "0.1.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
|
||||
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -1973,9 +2469,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.34"
|
||||
version = "0.1.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
|
||||
checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
@@ -2513,9 +3009,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.7.13"
|
||||
version = "0.7.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf"
|
||||
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
@@ -2567,18 +3063,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.8.27"
|
||||
version = "0.8.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
|
||||
checksum = "4ea879c944afe8a2b25fef16bb4ba234f47c694565e97383b36f3a878219065c"
|
||||
dependencies = [
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.8.27"
|
||||
version = "0.8.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
|
||||
checksum = "cf955aa904d6040f70dc8e9384444cb1030aed272ba3cb09bbc4ab9e7c1f34f5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-agent"
|
||||
version = "0.1.195"
|
||||
version = "0.1.200"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
@@ -20,4 +20,14 @@ gethostname = { workspace = true }
|
||||
chrono-tz = "0.8"
|
||||
toml = { workspace = true }
|
||||
async-trait = "0.1"
|
||||
reqwest = { version = "0.11", features = ["json", "blocking"] }
|
||||
reqwest = { version = "0.11", features = ["json", "blocking"] }
|
||||
|
||||
# Native system APIs
|
||||
nix = { version = "0.29", features = ["fs"] }
|
||||
rtnetlink = "0.14"
|
||||
netlink-packet-route = "0.19"
|
||||
futures = "0.3"
|
||||
libc = "0.2"
|
||||
|
||||
# Docker API client
|
||||
bollard = "0.17"
|
||||
@@ -1,14 +1,13 @@
|
||||
use anyhow::Result;
|
||||
use gethostname::gethostname;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::time::interval;
|
||||
use tracing::{debug, error, info};
|
||||
|
||||
use crate::communication::{AgentCommand, ZmqHandler};
|
||||
use crate::communication::ZmqHandler;
|
||||
use crate::config::AgentConfig;
|
||||
use crate::collectors::{
|
||||
Collector,
|
||||
backup::BackupCollector,
|
||||
cpu::CpuCollector,
|
||||
disk::DiskCollector,
|
||||
@@ -24,7 +23,7 @@ pub struct Agent {
|
||||
hostname: String,
|
||||
config: AgentConfig,
|
||||
zmq_handler: ZmqHandler,
|
||||
cache: Arc<RwLock<AgentData>>,
|
||||
collectors: Vec<Box<dyn Collector>>,
|
||||
notification_manager: NotificationManager,
|
||||
previous_status: Option<SystemStatus>,
|
||||
}
|
||||
@@ -56,94 +55,39 @@ impl Agent {
|
||||
config.zmq.publisher_port
|
||||
);
|
||||
|
||||
// Initialize shared cache
|
||||
let cache = Arc::new(RwLock::new(AgentData::new(
|
||||
hostname.clone(),
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
)));
|
||||
info!("Initialized shared agent data cache");
|
||||
|
||||
// Spawn independent collector tasks
|
||||
let mut collector_count = 0;
|
||||
|
||||
// CPU collector
|
||||
// Initialize collectors
|
||||
let mut collectors: Vec<Box<dyn Collector>> = Vec::new();
|
||||
|
||||
// Add enabled collectors
|
||||
if config.collectors.cpu.enabled {
|
||||
let cache_clone = cache.clone();
|
||||
let collector = CpuCollector::new(config.collectors.cpu.clone());
|
||||
let interval = config.collectors.cpu.interval_seconds;
|
||||
tokio::spawn(async move {
|
||||
Self::run_collector_task(cache_clone, collector, Duration::from_secs(interval), "CPU").await;
|
||||
});
|
||||
collector_count += 1;
|
||||
collectors.push(Box::new(CpuCollector::new(config.collectors.cpu.clone())));
|
||||
}
|
||||
|
||||
// Memory collector
|
||||
|
||||
if config.collectors.memory.enabled {
|
||||
let cache_clone = cache.clone();
|
||||
let collector = MemoryCollector::new(config.collectors.memory.clone());
|
||||
let interval = config.collectors.memory.interval_seconds;
|
||||
tokio::spawn(async move {
|
||||
Self::run_collector_task(cache_clone, collector, Duration::from_secs(interval), "Memory").await;
|
||||
});
|
||||
collector_count += 1;
|
||||
collectors.push(Box::new(MemoryCollector::new(config.collectors.memory.clone())));
|
||||
}
|
||||
|
||||
// Network collector
|
||||
if config.collectors.network.enabled {
|
||||
let cache_clone = cache.clone();
|
||||
let collector = NetworkCollector::new(config.collectors.network.clone());
|
||||
let interval = config.collectors.network.interval_seconds;
|
||||
tokio::spawn(async move {
|
||||
Self::run_collector_task(cache_clone, collector, Duration::from_secs(interval), "Network").await;
|
||||
});
|
||||
collector_count += 1;
|
||||
}
|
||||
|
||||
// Backup collector
|
||||
if config.collectors.backup.enabled {
|
||||
let cache_clone = cache.clone();
|
||||
let collector = BackupCollector::new();
|
||||
let interval = config.collectors.backup.interval_seconds;
|
||||
tokio::spawn(async move {
|
||||
Self::run_collector_task(cache_clone, collector, Duration::from_secs(interval), "Backup").await;
|
||||
});
|
||||
collector_count += 1;
|
||||
}
|
||||
|
||||
// NixOS collector
|
||||
if config.collectors.nixos.enabled {
|
||||
let cache_clone = cache.clone();
|
||||
let collector = NixOSCollector::new(config.collectors.nixos.clone());
|
||||
let interval = config.collectors.nixos.interval_seconds;
|
||||
tokio::spawn(async move {
|
||||
Self::run_collector_task(cache_clone, collector, Duration::from_secs(interval), "NixOS").await;
|
||||
});
|
||||
collector_count += 1;
|
||||
}
|
||||
|
||||
// Disk collector
|
||||
|
||||
if config.collectors.disk.enabled {
|
||||
let cache_clone = cache.clone();
|
||||
let collector = DiskCollector::new(config.collectors.disk.clone());
|
||||
let interval = config.collectors.disk.interval_seconds;
|
||||
tokio::spawn(async move {
|
||||
Self::run_collector_task(cache_clone, collector, Duration::from_secs(interval), "Disk").await;
|
||||
});
|
||||
collector_count += 1;
|
||||
collectors.push(Box::new(DiskCollector::new(config.collectors.disk.clone())));
|
||||
}
|
||||
|
||||
// Systemd collector
|
||||
|
||||
if config.collectors.systemd.enabled {
|
||||
let cache_clone = cache.clone();
|
||||
let collector = SystemdCollector::new(config.collectors.systemd.clone());
|
||||
let interval = config.collectors.systemd.interval_seconds;
|
||||
tokio::spawn(async move {
|
||||
Self::run_collector_task(cache_clone, collector, Duration::from_secs(interval), "Systemd").await;
|
||||
});
|
||||
collector_count += 1;
|
||||
collectors.push(Box::new(SystemdCollector::new(config.collectors.systemd.clone())));
|
||||
}
|
||||
|
||||
if config.collectors.backup.enabled {
|
||||
collectors.push(Box::new(BackupCollector::new()));
|
||||
}
|
||||
|
||||
info!("Spawned {} independent collector tasks", collector_count);
|
||||
if config.collectors.network.enabled {
|
||||
collectors.push(Box::new(NetworkCollector::new(config.collectors.network.clone())));
|
||||
}
|
||||
|
||||
if config.collectors.nixos.enabled {
|
||||
collectors.push(Box::new(NixOSCollector::new(config.collectors.nixos.clone())));
|
||||
}
|
||||
|
||||
info!("Initialized {} collectors", collectors.len());
|
||||
|
||||
// Initialize notification manager
|
||||
let notification_manager = NotificationManager::new(&config.notifications, &hostname)?;
|
||||
@@ -153,124 +97,42 @@ impl Agent {
|
||||
hostname,
|
||||
config,
|
||||
zmq_handler,
|
||||
cache,
|
||||
collectors,
|
||||
notification_manager,
|
||||
previous_status: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Independent collector task runner
|
||||
async fn run_collector_task<C>(
|
||||
cache: Arc<RwLock<AgentData>>,
|
||||
collector: C,
|
||||
interval_duration: Duration,
|
||||
name: &str,
|
||||
) where
|
||||
C: crate::collectors::Collector + Send + 'static,
|
||||
{
|
||||
let mut interval_timer = interval(interval_duration);
|
||||
info!("{} collector task started (interval: {:?})", name, interval_duration);
|
||||
|
||||
loop {
|
||||
interval_timer.tick().await;
|
||||
|
||||
// Acquire write lock and update cache
|
||||
{
|
||||
let mut agent_data = cache.write().await;
|
||||
match collector.collect_structured(&mut *agent_data).await {
|
||||
Ok(_) => {
|
||||
debug!("{} collector updated cache", name);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("{} collector failed: {}", name, e);
|
||||
}
|
||||
}
|
||||
} // Release lock immediately after collection
|
||||
}
|
||||
}
|
||||
|
||||
/// Main agent loop with cached data architecture
|
||||
/// Main agent loop with structured data collection
|
||||
pub async fn run(&mut self, mut shutdown_rx: tokio::sync::oneshot::Receiver<()>) -> Result<()> {
|
||||
info!("Starting agent main loop with cached collector architecture");
|
||||
info!("Starting agent main loop");
|
||||
|
||||
// Spawn independent ZMQ sender task
|
||||
// Create dedicated ZMQ publisher for the sender task
|
||||
let cache_clone = self.cache.clone();
|
||||
let publisher_config = self.config.zmq.clone();
|
||||
let transmission_interval_secs = self.config.collection_interval_seconds;
|
||||
// Initial collection
|
||||
if let Err(e) = self.collect_and_broadcast().await {
|
||||
error!("Initial metric collection failed: {}", e);
|
||||
}
|
||||
|
||||
std::thread::spawn(move || {
|
||||
// Create ZMQ publisher in this thread (ZMQ sockets are not thread-safe)
|
||||
let context = zmq::Context::new();
|
||||
let publisher = context.socket(zmq::SocketType::PUB).unwrap();
|
||||
let bind_address = format!("tcp://{}:{}", publisher_config.bind_address, publisher_config.publisher_port);
|
||||
publisher.bind(&bind_address).unwrap();
|
||||
publisher.set_sndhwm(1000).unwrap();
|
||||
publisher.set_linger(1000).unwrap();
|
||||
info!("ZMQ sender task started on {} (interval: {}s)", bind_address, transmission_interval_secs);
|
||||
|
||||
let mut last_sent_data: Option<AgentData> = None;
|
||||
let interval_duration = std::time::Duration::from_secs(transmission_interval_secs);
|
||||
let mut next_send = std::time::Instant::now() + interval_duration;
|
||||
|
||||
loop {
|
||||
// Sleep until next send time
|
||||
std::thread::sleep(next_send.saturating_duration_since(std::time::Instant::now()));
|
||||
next_send = std::time::Instant::now() + interval_duration;
|
||||
|
||||
// Try to read cache without blocking - if locked, send last known data
|
||||
let data_to_send = match cache_clone.try_read() {
|
||||
Ok(agent_data) => {
|
||||
let data_clone = agent_data.clone();
|
||||
drop(agent_data); // Release lock immediately
|
||||
last_sent_data = Some(data_clone.clone());
|
||||
Some(data_clone)
|
||||
}
|
||||
Err(_) => {
|
||||
// Lock is held by collector - use last sent data
|
||||
debug!("Cache locked by collector, sending previous data");
|
||||
last_sent_data.clone()
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(data) = data_to_send {
|
||||
// Publish via ZMQ
|
||||
if let Ok(envelope) = cm_dashboard_shared::MessageEnvelope::agent_data(data) {
|
||||
if let Ok(serialized) = serde_json::to_vec(&envelope) {
|
||||
if let Err(e) = publisher.send(&serialized, 0) {
|
||||
error!("Failed to send ZMQ message: {}", e);
|
||||
} else {
|
||||
debug!("Successfully broadcast agent data");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Set up intervals for notifications and commands
|
||||
let mut notification_interval = interval(Duration::from_secs(
|
||||
self.config.notifications.check_interval_seconds,
|
||||
// Set up intervals
|
||||
let mut transmission_interval = interval(Duration::from_secs(
|
||||
self.config.collection_interval_seconds,
|
||||
));
|
||||
let mut command_interval = interval(Duration::from_millis(100));
|
||||
let mut notification_interval = interval(Duration::from_secs(30)); // Check notifications every 30s
|
||||
|
||||
// Skip initial ticks
|
||||
// Skip initial ticks to avoid immediate execution
|
||||
transmission_interval.tick().await;
|
||||
notification_interval.tick().await;
|
||||
command_interval.tick().await;
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = notification_interval.tick() => {
|
||||
// Read cache and check for status changes
|
||||
let agent_data = self.cache.read().await.clone();
|
||||
if let Err(e) = self.check_status_changes_and_notify(&agent_data).await {
|
||||
error!("Failed to check status changes: {}", e);
|
||||
_ = transmission_interval.tick() => {
|
||||
if let Err(e) = self.collect_and_broadcast().await {
|
||||
error!("Failed to collect and broadcast metrics: {}", e);
|
||||
}
|
||||
}
|
||||
_ = command_interval.tick() => {
|
||||
if let Err(e) = self.handle_commands().await {
|
||||
error!("Error handling commands: {}", e);
|
||||
}
|
||||
_ = notification_interval.tick() => {
|
||||
// Process any pending notifications
|
||||
// NOTE: With structured data, we might need to implement status tracking differently
|
||||
// For now, we skip this until status evaluation is migrated
|
||||
}
|
||||
_ = &mut shutdown_rx => {
|
||||
info!("Shutdown signal received, stopping agent loop");
|
||||
@@ -283,6 +145,35 @@ impl Agent {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Collect structured data from all collectors and broadcast via ZMQ
|
||||
async fn collect_and_broadcast(&mut self) -> Result<()> {
|
||||
debug!("Starting structured data collection");
|
||||
|
||||
// Initialize empty AgentData
|
||||
let mut agent_data = AgentData::new(self.hostname.clone(), env!("CARGO_PKG_VERSION").to_string());
|
||||
|
||||
// Collect data from all collectors
|
||||
for collector in &self.collectors {
|
||||
if let Err(e) = collector.collect_structured(&mut agent_data).await {
|
||||
error!("Collector failed: {}", e);
|
||||
// Continue with other collectors even if one fails
|
||||
}
|
||||
}
|
||||
|
||||
// Check for status changes and send notifications
|
||||
if let Err(e) = self.check_status_changes_and_notify(&agent_data).await {
|
||||
error!("Failed to check status changes: {}", e);
|
||||
}
|
||||
|
||||
// Broadcast the structured data via ZMQ
|
||||
if let Err(e) = self.zmq_handler.publish_agent_data(&agent_data).await {
|
||||
error!("Failed to broadcast agent data: {}", e);
|
||||
} else {
|
||||
debug!("Successfully broadcast structured agent data");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check for status changes and send notifications
|
||||
async fn check_status_changes_and_notify(&mut self, agent_data: &AgentData) -> Result<()> {
|
||||
@@ -362,36 +253,4 @@ impl Agent {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle incoming commands from dashboard
|
||||
async fn handle_commands(&mut self) -> Result<()> {
|
||||
// Try to receive a command (non-blocking)
|
||||
if let Ok(Some(command)) = self.zmq_handler.try_receive_command() {
|
||||
info!("Received command: {:?}", command);
|
||||
|
||||
match command {
|
||||
AgentCommand::CollectNow => {
|
||||
info!("Received immediate transmission request");
|
||||
// With cached architecture and dedicated ZMQ sender thread,
|
||||
// data is already being sent every interval
|
||||
// This command is acknowledged but not actionable in new architecture
|
||||
}
|
||||
AgentCommand::SetInterval { seconds } => {
|
||||
info!("Received interval change request: {}s", seconds);
|
||||
// Note: This would require more complex handling to update the interval
|
||||
// For now, just acknowledge
|
||||
}
|
||||
AgentCommand::ToggleCollector { name, enabled } => {
|
||||
info!("Received collector toggle request: {} -> {}", name, enabled);
|
||||
// Note: This would require more complex handling to enable/disable collectors
|
||||
// For now, just acknowledge
|
||||
}
|
||||
AgentCommand::Ping => {
|
||||
info!("Received ping command");
|
||||
// Maybe send back a pong or status
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,22 +1,25 @@
|
||||
use async_trait::async_trait;
|
||||
use cm_dashboard_shared::{AgentData, Status, HysteresisThresholds};
|
||||
use cm_dashboard_shared::{AgentData, Status, HysteresisThresholds, CpuData};
|
||||
use std::sync::RwLock;
|
||||
use std::time::Instant;
|
||||
|
||||
use tracing::debug;
|
||||
|
||||
use super::{utils, Collector, CollectorError};
|
||||
use crate::config::CpuConfig;
|
||||
|
||||
/// Extremely efficient CPU metrics collector
|
||||
///
|
||||
/// EFFICIENCY OPTIMIZATIONS:
|
||||
/// - Single /proc/loadavg read for all load metrics
|
||||
/// - Single /proc/stat read for CPU usage
|
||||
/// - Minimal string allocations
|
||||
/// - No process spawning
|
||||
/// - <0.1ms collection time target
|
||||
/// Extremely efficient CPU metrics collector with interval-based caching
|
||||
pub struct CpuCollector {
|
||||
load_thresholds: HysteresisThresholds,
|
||||
temperature_thresholds: HysteresisThresholds,
|
||||
config: CpuConfig,
|
||||
state: RwLock<CpuCacheState>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CpuCacheState {
|
||||
last_collection: Option<Instant>,
|
||||
cached_data: CpuData,
|
||||
}
|
||||
|
||||
impl CpuCollector {
|
||||
@@ -26,15 +29,39 @@ impl CpuCollector {
|
||||
config.load_warning_threshold,
|
||||
config.load_critical_threshold,
|
||||
);
|
||||
|
||||
|
||||
let temperature_thresholds = HysteresisThresholds::new(
|
||||
config.temperature_warning_threshold,
|
||||
config.temperature_critical_threshold,
|
||||
);
|
||||
|
||||
|
||||
Self {
|
||||
load_thresholds,
|
||||
temperature_thresholds,
|
||||
config,
|
||||
state: RwLock::new(CpuCacheState {
|
||||
last_collection: None,
|
||||
cached_data: CpuData {
|
||||
load_1min: 0.0,
|
||||
load_5min: 0.0,
|
||||
load_15min: 0.0,
|
||||
frequency_mhz: 0.0,
|
||||
temperature_celsius: None,
|
||||
load_status: Status::Unknown,
|
||||
temperature_status: Status::Unknown,
|
||||
},
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn should_update_cache(&self) -> bool {
|
||||
let state = self.state.read().unwrap();
|
||||
match state.last_collection {
|
||||
None => true,
|
||||
Some(last) => {
|
||||
let cache_duration = std::time::Duration::from_secs(self.config.interval_seconds);
|
||||
last.elapsed() > cache_duration
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -156,6 +183,14 @@ impl CpuCollector {
|
||||
#[async_trait]
|
||||
impl Collector for CpuCollector {
|
||||
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||
// Check if cache is valid
|
||||
if !self.should_update_cache() {
|
||||
let state = self.state.read().unwrap();
|
||||
agent_data.system.cpu = state.cached_data.clone();
|
||||
debug!("Using cached CPU data (interval: {}s)", self.config.interval_seconds);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
debug!("Collecting CPU metrics");
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
@@ -187,6 +222,11 @@ impl Collector for CpuCollector {
|
||||
Status::Unknown
|
||||
};
|
||||
|
||||
// Update cache
|
||||
let mut state = self.state.write().unwrap();
|
||||
state.last_collection = Some(Instant::now());
|
||||
state.cached_data = agent_data.system.cpu.clone();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use crate::config::DiskConfig;
|
||||
use std::process::Command;
|
||||
use std::time::Instant;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::RwLock;
|
||||
use tracing::debug;
|
||||
|
||||
use super::{Collector, CollectorError};
|
||||
@@ -14,6 +15,19 @@ use super::{Collector, CollectorError};
|
||||
pub struct DiskCollector {
|
||||
config: DiskConfig,
|
||||
temperature_thresholds: HysteresisThresholds,
|
||||
/// Cached state with thread-safe interior mutability
|
||||
state: RwLock<DiskCacheState>,
|
||||
}
|
||||
|
||||
/// Internal state for disk caching
|
||||
#[derive(Debug, Clone)]
|
||||
struct DiskCacheState {
|
||||
/// Last collection time for performance tracking
|
||||
last_collection: Option<Instant>,
|
||||
/// Cached drive data
|
||||
cached_drives: Vec<DriveData>,
|
||||
/// Cached pool data
|
||||
cached_pools: Vec<PoolData>,
|
||||
}
|
||||
|
||||
/// A physical drive with its filesystems
|
||||
@@ -58,10 +72,17 @@ impl DiskCollector {
|
||||
config.temperature_warning_celsius,
|
||||
config.temperature_critical_celsius,
|
||||
);
|
||||
|
||||
|
||||
let state = DiskCacheState {
|
||||
last_collection: None,
|
||||
cached_drives: Vec::new(),
|
||||
cached_pools: Vec::new(),
|
||||
};
|
||||
|
||||
Self {
|
||||
config,
|
||||
temperature_thresholds,
|
||||
state: RwLock::new(state),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,40 +125,70 @@ impl DiskCollector {
|
||||
self.populate_drives_data(&physical_drives, &smart_data, agent_data)?;
|
||||
self.populate_pools_data(&mergerfs_pools, &smart_data, agent_data)?;
|
||||
|
||||
// Step 7: Update cache with fresh data
|
||||
{
|
||||
let mut state = self.state.write().unwrap();
|
||||
state.cached_drives = agent_data.system.storage.drives.clone();
|
||||
state.cached_pools = agent_data.system.storage.pools.clone();
|
||||
state.last_collection = Some(Instant::now());
|
||||
}
|
||||
|
||||
let elapsed = start_time.elapsed();
|
||||
debug!("Storage collection completed in {:?}", elapsed);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get block devices and their mount points using lsblk
|
||||
/// Check if disk collection cache should be updated
|
||||
fn should_update_cache(&self) -> bool {
|
||||
let state = self.state.read().unwrap();
|
||||
|
||||
match state.last_collection {
|
||||
None => true,
|
||||
Some(last) => {
|
||||
let cache_duration = std::time::Duration::from_secs(self.config.interval_seconds);
|
||||
last.elapsed() > cache_duration
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get cached disk data if available and fresh
|
||||
fn get_cached_data(&self) -> Option<(Vec<DriveData>, Vec<PoolData>)> {
|
||||
if !self.should_update_cache() {
|
||||
let state = self.state.read().unwrap();
|
||||
Some((state.cached_drives.clone(), state.cached_pools.clone()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get block devices and their mount points by reading /proc/mounts
|
||||
async fn get_mount_devices(&self) -> Result<HashMap<String, String>, CollectorError> {
|
||||
use super::run_command_with_timeout;
|
||||
|
||||
let mut cmd = Command::new("lsblk");
|
||||
cmd.args(&["-rn", "-o", "NAME,MOUNTPOINT"]);
|
||||
|
||||
let output = run_command_with_timeout(cmd, self.config.command_timeout_seconds).await
|
||||
let content = std::fs::read_to_string("/proc/mounts")
|
||||
.map_err(|e| CollectorError::SystemRead {
|
||||
path: "block devices".to_string(),
|
||||
path: "/proc/mounts".to_string(),
|
||||
error: e.to_string(),
|
||||
})?;
|
||||
|
||||
let mut mount_devices = HashMap::new();
|
||||
for line in String::from_utf8_lossy(&output.stdout).lines() {
|
||||
|
||||
for line in content.lines() {
|
||||
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||
if parts.len() >= 2 {
|
||||
let device_name = parts[0];
|
||||
if parts.len() >= 3 {
|
||||
let device = parts[0];
|
||||
let mount_point = parts[1];
|
||||
|
||||
// Skip swap partitions and unmounted devices
|
||||
if mount_point == "[SWAP]" || mount_point.is_empty() {
|
||||
let fs_type = parts[2];
|
||||
|
||||
// Skip pseudo filesystems and fuse mounts
|
||||
if fs_type.starts_with("fuse") ||
|
||||
matches!(fs_type, "proc" | "sysfs" | "tmpfs" | "devtmpfs" |
|
||||
"devpts" | "cgroup" | "cgroup2" | "pstore" | "bpf" |
|
||||
"tracefs" | "debugfs" | "securityfs" | "hugetlbfs" |
|
||||
"mqueue" | "configfs" | "autofs") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Convert device name to full path
|
||||
let device_path = format!("/dev/{}", device_name);
|
||||
mount_devices.insert(mount_point.to_string(), device_path);
|
||||
|
||||
mount_devices.insert(mount_point.to_string(), device.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -187,44 +238,20 @@ impl DiskCollector {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get filesystem info for a single mount point
|
||||
/// Get filesystem info for a single mount point using statvfs syscall
|
||||
fn get_filesystem_info(&self, mount_point: &str) -> Result<(u64, u64), CollectorError> {
|
||||
let output = std::process::Command::new("timeout")
|
||||
.args(&["2", "df", "--block-size=1", mount_point])
|
||||
.output()
|
||||
.map_err(|e| CollectorError::SystemRead {
|
||||
path: format!("df {}", mount_point),
|
||||
error: e.to_string(),
|
||||
})?;
|
||||
use nix::sys::statvfs::statvfs;
|
||||
|
||||
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||
let lines: Vec<&str> = output_str.lines().collect();
|
||||
|
||||
if lines.len() < 2 {
|
||||
return Err(CollectorError::Parse {
|
||||
value: output_str.to_string(),
|
||||
error: "Expected at least 2 lines from df output".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Parse the data line (skip header)
|
||||
let parts: Vec<&str> = lines[1].split_whitespace().collect();
|
||||
if parts.len() < 4 {
|
||||
return Err(CollectorError::Parse {
|
||||
value: lines[1].to_string(),
|
||||
error: "Expected at least 4 fields in df output".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
let total_bytes: u64 = parts[1].parse().map_err(|e| CollectorError::Parse {
|
||||
value: parts[1].to_string(),
|
||||
error: format!("Failed to parse total bytes: {}", e),
|
||||
let stat = statvfs(mount_point).map_err(|e| CollectorError::SystemRead {
|
||||
path: mount_point.to_string(),
|
||||
error: format!("statvfs failed: {}", e),
|
||||
})?;
|
||||
|
||||
let used_bytes: u64 = parts[2].parse().map_err(|e| CollectorError::Parse {
|
||||
value: parts[2].to_string(),
|
||||
error: format!("Failed to parse used bytes: {}", e),
|
||||
})?;
|
||||
// Calculate total and used bytes
|
||||
let block_size = stat.fragment_size() as u64;
|
||||
let total_bytes = stat.blocks() as u64 * block_size;
|
||||
let available_bytes = stat.blocks_available() as u64 * block_size;
|
||||
let used_bytes = total_bytes - available_bytes;
|
||||
|
||||
Ok((total_bytes, used_bytes))
|
||||
}
|
||||
@@ -530,9 +557,6 @@ impl DiskCollector {
|
||||
|
||||
/// Populate drives data into AgentData
|
||||
fn populate_drives_data(&self, physical_drives: &[PhysicalDrive], smart_data: &HashMap<String, SmartData>, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||
// Clear existing drives data to prevent duplicates in cached architecture
|
||||
agent_data.system.storage.drives.clear();
|
||||
|
||||
for drive in physical_drives {
|
||||
let smart = smart_data.get(&drive.name);
|
||||
|
||||
@@ -570,9 +594,6 @@ impl DiskCollector {
|
||||
|
||||
/// Populate pools data into AgentData
|
||||
fn populate_pools_data(&self, mergerfs_pools: &[MergerfsPool], smart_data: &HashMap<String, SmartData>, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||
// Clear existing pools data to prevent duplicates in cached architecture
|
||||
agent_data.system.storage.pools.clear();
|
||||
|
||||
for pool in mergerfs_pools {
|
||||
// Calculate pool health and statuses based on member drive health
|
||||
let (pool_health, health_status, usage_status, data_drive_data, parity_drive_data) = self.calculate_pool_health(pool, smart_data);
|
||||
@@ -766,32 +787,29 @@ impl DiskCollector {
|
||||
Ok((data_drives, parity_drives))
|
||||
}
|
||||
|
||||
/// Get drive information for a mount path
|
||||
/// Get drive information for a mount path by reading /proc/mounts
|
||||
fn get_drive_info_for_path(&self, path: &str) -> anyhow::Result<PoolDrive> {
|
||||
// Use lsblk to find the backing device with timeout
|
||||
let output = Command::new("timeout")
|
||||
.args(&["2", "lsblk", "-rn", "-o", "NAME,MOUNTPOINT"])
|
||||
.output()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to run lsblk: {}", e))?;
|
||||
|
||||
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||
// Read /proc/mounts to find the backing device
|
||||
let content = std::fs::read_to_string("/proc/mounts")
|
||||
.map_err(|e| anyhow::anyhow!("Failed to read /proc/mounts: {}", e))?;
|
||||
|
||||
let mut device = String::new();
|
||||
|
||||
for line in output_str.lines() {
|
||||
|
||||
for line in content.lines() {
|
||||
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||
if parts.len() >= 2 && parts[1] == path {
|
||||
device = parts[0].to_string();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if device.is_empty() {
|
||||
return Err(anyhow::anyhow!("Could not find device for path {}", path));
|
||||
}
|
||||
|
||||
// Extract base device name (e.g., "sda1" -> "sda")
|
||||
let base_device = self.extract_base_device(&format!("/dev/{}", device));
|
||||
|
||||
|
||||
// Extract base device name (e.g., "/dev/sda1" -> "sda")
|
||||
let base_device = self.extract_base_device(&device);
|
||||
|
||||
// Get temperature from SMART data if available
|
||||
let temperature = if let Ok(smart_data) = tokio::task::block_in_place(|| {
|
||||
tokio::runtime::Handle::current().block_on(self.get_smart_data(&base_device))
|
||||
@@ -800,7 +818,7 @@ impl DiskCollector {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
|
||||
Ok(PoolDrive {
|
||||
name: base_device,
|
||||
mount_point: path.to_string(),
|
||||
@@ -844,7 +862,15 @@ impl DiskCollector {
|
||||
#[async_trait]
|
||||
impl Collector for DiskCollector {
|
||||
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||
self.collect_storage_data(agent_data).await
|
||||
// Use cached data if available and fresh
|
||||
if let Some((cached_drives, cached_pools)) = self.get_cached_data() {
|
||||
agent_data.system.storage.drives = cached_drives;
|
||||
agent_data.system.storage.pools = cached_pools;
|
||||
Ok(())
|
||||
} else {
|
||||
// Collect fresh data
|
||||
self.collect_storage_data(agent_data).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -95,10 +95,9 @@ impl MemoryCollector {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Populate tmpfs data into AgentData
|
||||
/// Populate tmpfs data into AgentData using statvfs syscall
|
||||
async fn populate_tmpfs_data(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||
// Clear existing tmpfs data to prevent duplicates in cached architecture
|
||||
agent_data.system.memory.tmpfs.clear();
|
||||
use nix::sys::statvfs::statvfs;
|
||||
|
||||
// Discover all tmpfs mount points
|
||||
let tmpfs_mounts = self.discover_tmpfs_mounts()?;
|
||||
@@ -108,52 +107,35 @@ impl MemoryCollector {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Get usage data for all tmpfs mounts at once using df (with 2 second timeout)
|
||||
let mut df_args = vec!["2", "df", "--output=target,size,used", "--block-size=1"];
|
||||
df_args.extend(tmpfs_mounts.iter().map(|s| s.as_str()));
|
||||
// Get usage data for each tmpfs mount using statvfs syscall
|
||||
for mount_point in tmpfs_mounts {
|
||||
match statvfs(mount_point.as_str()) {
|
||||
Ok(stat) => {
|
||||
let block_size = stat.fragment_size() as u64;
|
||||
let total_bytes = stat.blocks() as u64 * block_size;
|
||||
let available_bytes = stat.blocks_available() as u64 * block_size;
|
||||
let used_bytes = total_bytes - available_bytes;
|
||||
|
||||
let df_output = std::process::Command::new("timeout")
|
||||
.args(&df_args[..])
|
||||
.output()
|
||||
.map_err(|e| CollectorError::SystemRead {
|
||||
path: "tmpfs mounts".to_string(),
|
||||
error: e.to_string(),
|
||||
})?;
|
||||
if total_bytes == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let df_str = String::from_utf8_lossy(&df_output.stdout);
|
||||
let df_lines: Vec<&str> = df_str.lines().skip(1).collect(); // Skip header
|
||||
let total_gb = total_bytes as f32 / (1024.0 * 1024.0 * 1024.0);
|
||||
let used_gb = used_bytes as f32 / (1024.0 * 1024.0 * 1024.0);
|
||||
let usage_percent = (used_bytes as f32 / total_bytes as f32) * 100.0;
|
||||
|
||||
// Process each tmpfs mount
|
||||
for (i, mount_point) in tmpfs_mounts.iter().enumerate() {
|
||||
if i >= df_lines.len() {
|
||||
debug!("Not enough df output lines for tmpfs mount: {}", mount_point);
|
||||
continue;
|
||||
// Add to tmpfs list
|
||||
agent_data.system.memory.tmpfs.push(TmpfsData {
|
||||
mount: mount_point.clone(),
|
||||
usage_percent,
|
||||
used_gb,
|
||||
total_gb,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Failed to get stats for tmpfs mount {}: {}", mount_point, e);
|
||||
}
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = df_lines[i].split_whitespace().collect();
|
||||
if parts.len() < 3 {
|
||||
debug!("Invalid df output for tmpfs mount: {}", mount_point);
|
||||
continue;
|
||||
}
|
||||
|
||||
let total_bytes: u64 = parts[1].parse().unwrap_or(0);
|
||||
let used_bytes: u64 = parts[2].parse().unwrap_or(0);
|
||||
|
||||
if total_bytes == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let total_gb = total_bytes as f32 / (1024.0 * 1024.0 * 1024.0);
|
||||
let used_gb = used_bytes as f32 / (1024.0 * 1024.0 * 1024.0);
|
||||
let usage_percent = (used_bytes as f32 / total_bytes as f32) * 100.0;
|
||||
|
||||
// Add to tmpfs list
|
||||
agent_data.system.memory.tmpfs.push(TmpfsData {
|
||||
mount: mount_point.clone(),
|
||||
usage_percent,
|
||||
used_gb,
|
||||
total_gb,
|
||||
});
|
||||
}
|
||||
|
||||
// Sort tmpfs mounts by mount point for consistent display order
|
||||
|
||||
@@ -1,19 +1,24 @@
|
||||
use async_trait::async_trait;
|
||||
use cm_dashboard_shared::{AgentData, NetworkInterfaceData, Status};
|
||||
use std::process::Command;
|
||||
use tracing::debug;
|
||||
use futures::stream::TryStreamExt;
|
||||
use rtnetlink::{new_connection, IpVersion};
|
||||
use netlink_packet_route::link::LinkAttribute;
|
||||
use netlink_packet_route::address::AddressAttribute;
|
||||
use netlink_packet_route::route::RouteAttribute;
|
||||
use std::net::IpAddr;
|
||||
|
||||
use super::{Collector, CollectorError};
|
||||
use crate::config::NetworkConfig;
|
||||
|
||||
/// Network interface collector with physical/virtual classification and link status
|
||||
pub struct NetworkCollector {
|
||||
config: NetworkConfig,
|
||||
_config: NetworkConfig,
|
||||
}
|
||||
|
||||
impl NetworkCollector {
|
||||
pub fn new(config: NetworkConfig) -> Self {
|
||||
Self { config }
|
||||
Self { _config: config }
|
||||
}
|
||||
|
||||
/// Check if interface is physical (not virtual)
|
||||
@@ -49,37 +54,52 @@ impl NetworkCollector {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the primary physical interface (the one with default route)
|
||||
fn get_primary_physical_interface(&self) -> Option<String> {
|
||||
let timeout_str = self.config.command_timeout_seconds.to_string();
|
||||
match Command::new("timeout").args([&timeout_str, "ip", "route", "show", "default"]).output() {
|
||||
Ok(output) if output.status.success() => {
|
||||
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||
// Parse: "default via 192.168.1.1 dev eno1 ..."
|
||||
for line in output_str.lines() {
|
||||
if line.starts_with("default") {
|
||||
if let Some(dev_pos) = line.find(" dev ") {
|
||||
let after_dev = &line[dev_pos + 5..];
|
||||
if let Some(space_pos) = after_dev.find(' ') {
|
||||
let interface = &after_dev[..space_pos];
|
||||
// Only return if it's a physical interface
|
||||
if Self::is_physical_interface(interface) {
|
||||
return Some(interface.to_string());
|
||||
}
|
||||
/// Get the primary physical interface (the one with default route) using rtnetlink
|
||||
async fn get_primary_physical_interface() -> Option<String> {
|
||||
let (connection, handle, _) = match new_connection() {
|
||||
Ok(conn) => conn,
|
||||
Err(e) => {
|
||||
debug!("Failed to create netlink connection: {}", e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
tokio::spawn(connection);
|
||||
|
||||
// Get default route
|
||||
let mut routes = handle.route().get(IpVersion::V4).execute();
|
||||
|
||||
while let Ok(Some(route)) = routes.try_next().await {
|
||||
// Check if this is a default route (destination is 0.0.0.0/0)
|
||||
if route.header.destination_prefix_length == 0 {
|
||||
// Find the output interface (OIF) attribute
|
||||
if let Some(oif) = route.attributes.iter().find_map(|attr| {
|
||||
if let RouteAttribute::Oif(index) = attr {
|
||||
Some(*index)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}) {
|
||||
// Get interface name from index
|
||||
let mut link = handle.link().get().match_index(oif).execute();
|
||||
if let Ok(Some(link_msg)) = link.try_next().await {
|
||||
if let Some(name) = link_msg.attributes.iter().find_map(|attr| {
|
||||
if let LinkAttribute::IfName(n) = attr {
|
||||
Some(n.to_string())
|
||||
} else {
|
||||
// No space after interface name (end of line)
|
||||
let interface = after_dev.trim();
|
||||
if Self::is_physical_interface(interface) {
|
||||
return Some(interface.to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
}) {
|
||||
if Self::is_physical_interface(&name) {
|
||||
return Some(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Parse VLAN configuration from /proc/net/vlan/config
|
||||
@@ -104,103 +124,105 @@ impl NetworkCollector {
|
||||
vlan_map
|
||||
}
|
||||
|
||||
/// Collect network interfaces using ip command
|
||||
/// Collect network interfaces using rtnetlink
|
||||
async fn collect_interfaces(&self) -> Vec<NetworkInterfaceData> {
|
||||
let mut interfaces = Vec::new();
|
||||
|
||||
// Parse VLAN configuration
|
||||
let vlan_map = Self::parse_vlan_config();
|
||||
|
||||
let timeout_str = self.config.command_timeout_seconds.to_string();
|
||||
match Command::new("timeout").args([&timeout_str, "ip", "-j", "addr"]).output() {
|
||||
Ok(output) if output.status.success() => {
|
||||
let json_str = String::from_utf8_lossy(&output.stdout);
|
||||
// Create netlink connection
|
||||
let (connection, handle, _) = match new_connection() {
|
||||
Ok(conn) => conn,
|
||||
Err(e) => {
|
||||
debug!("Failed to create netlink connection: {}", e);
|
||||
return interfaces;
|
||||
}
|
||||
};
|
||||
|
||||
if let Ok(json_data) = serde_json::from_str::<serde_json::Value>(&json_str) {
|
||||
if let Some(ifaces) = json_data.as_array() {
|
||||
for iface in ifaces {
|
||||
let name = iface["ifname"].as_str().unwrap_or("").to_string();
|
||||
tokio::spawn(connection);
|
||||
|
||||
// Skip loopback, empty names, and ifb* interfaces
|
||||
if name.is_empty() || name == "lo" || name.starts_with("ifb") {
|
||||
continue;
|
||||
}
|
||||
// Get all links
|
||||
let mut links = handle.link().get().execute();
|
||||
|
||||
// Parse parent interface from @parent notation (e.g., lan@enp0s31f6)
|
||||
let (interface_name, parent_interface) = if let Some(at_pos) = name.find('@') {
|
||||
let (child, parent) = name.split_at(at_pos);
|
||||
(child.to_string(), Some(parent[1..].to_string()))
|
||||
} else {
|
||||
(name.clone(), None)
|
||||
};
|
||||
while let Ok(Some(link)) = links.try_next().await {
|
||||
// Get interface name
|
||||
let name = match link.attributes.iter().find_map(|attr| {
|
||||
if let LinkAttribute::IfName(n) = attr {
|
||||
Some(n.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}) {
|
||||
Some(n) => n,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let mut ipv4_addresses = Vec::new();
|
||||
let mut ipv6_addresses = Vec::new();
|
||||
// Skip loopback and ifb interfaces
|
||||
if name == "lo" || name.starts_with("ifb") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract IP addresses
|
||||
if let Some(addr_info) = iface["addr_info"].as_array() {
|
||||
for addr in addr_info {
|
||||
if let Some(family) = addr["family"].as_str() {
|
||||
if let Some(local) = addr["local"].as_str() {
|
||||
match family {
|
||||
"inet" => ipv4_addresses.push(local.to_string()),
|
||||
"inet6" => {
|
||||
// Skip link-local IPv6 addresses (fe80::)
|
||||
if !local.starts_with("fe80:") {
|
||||
ipv6_addresses.push(local.to_string());
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Parse parent interface from @parent notation (e.g., lan@enp0s31f6)
|
||||
let (interface_name, parent_interface) = if let Some(at_pos) = name.find('@') {
|
||||
let (child, parent) = name.split_at(at_pos);
|
||||
(child.to_string(), Some(parent[1..].to_string()))
|
||||
} else {
|
||||
(name.clone(), None)
|
||||
};
|
||||
|
||||
// Get IP addresses for this interface
|
||||
let mut ipv4_addresses = Vec::new();
|
||||
let mut ipv6_addresses = Vec::new();
|
||||
|
||||
let mut addrs = handle.address().get().set_link_index_filter(link.header.index).execute();
|
||||
while let Ok(Some(addr)) = addrs.try_next().await {
|
||||
for nla in &addr.attributes {
|
||||
if let AddressAttribute::Address(ip) = nla {
|
||||
match ip {
|
||||
IpAddr::V4(ipv4) => ipv4_addresses.push(ipv4.to_string()),
|
||||
IpAddr::V6(ipv6) => {
|
||||
// Skip link-local IPv6 addresses (fe80::)
|
||||
if !ipv6.to_string().starts_with("fe80:") {
|
||||
ipv6_addresses.push(ipv6.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Determine if physical and get status
|
||||
let is_physical = Self::is_physical_interface(&interface_name);
|
||||
|
||||
// Only filter out virtual interfaces without IPs
|
||||
// Physical interfaces should always be shown even if down/no IPs
|
||||
if !is_physical && ipv4_addresses.is_empty() && ipv6_addresses.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let link_status = if is_physical {
|
||||
Self::get_link_status(&name)
|
||||
} else {
|
||||
Status::Unknown // Virtual interfaces don't have meaningful link status
|
||||
};
|
||||
|
||||
// Look up VLAN ID from the map (use original name before @ parsing)
|
||||
let vlan_id = vlan_map.get(&name).copied();
|
||||
|
||||
interfaces.push(NetworkInterfaceData {
|
||||
name: interface_name,
|
||||
ipv4_addresses,
|
||||
ipv6_addresses,
|
||||
is_physical,
|
||||
link_status,
|
||||
parent_interface,
|
||||
vlan_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Failed to execute ip command: {}", e);
|
||||
}
|
||||
Ok(output) => {
|
||||
debug!("ip command failed with status: {}", output.status);
|
||||
|
||||
// Determine if physical
|
||||
let is_physical = Self::is_physical_interface(&interface_name);
|
||||
|
||||
// Only filter out virtual interfaces without IPs
|
||||
if !is_physical && ipv4_addresses.is_empty() && ipv6_addresses.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let link_status = if is_physical {
|
||||
Self::get_link_status(&name)
|
||||
} else {
|
||||
Status::Unknown
|
||||
};
|
||||
|
||||
// Look up VLAN ID
|
||||
let vlan_id = vlan_map.get(&name).copied();
|
||||
|
||||
interfaces.push(NetworkInterfaceData {
|
||||
name: interface_name,
|
||||
ipv4_addresses,
|
||||
ipv6_addresses,
|
||||
is_physical,
|
||||
link_status,
|
||||
parent_interface,
|
||||
vlan_id,
|
||||
});
|
||||
}
|
||||
|
||||
// Assign primary physical interface as parent to virtual interfaces without explicit parent
|
||||
let primary_interface = self.get_primary_physical_interface();
|
||||
if let Some(primary) = primary_interface {
|
||||
// Assign primary physical interface as parent to virtual interfaces
|
||||
if let Some(primary) = Self::get_primary_physical_interface().await {
|
||||
for interface in interfaces.iter_mut() {
|
||||
// Only assign parent to virtual interfaces that don't already have one
|
||||
if !interface.is_physical && interface.parent_interface.is_none() {
|
||||
interface.parent_interface = Some(primary.clone());
|
||||
}
|
||||
|
||||
@@ -5,6 +5,8 @@ use std::process::Command;
|
||||
use std::sync::RwLock;
|
||||
use std::time::Instant;
|
||||
use tracing::{debug, warn};
|
||||
use bollard::Docker;
|
||||
use bollard::container::ListContainersOptions;
|
||||
|
||||
use super::{Collector, CollectorError};
|
||||
use crate::config::SystemdConfig;
|
||||
@@ -74,7 +76,7 @@ impl SystemdCollector {
|
||||
debug!("Collecting systemd services metrics");
|
||||
|
||||
// Get cached services (discovery only happens when needed)
|
||||
let monitored_services = match self.get_monitored_services() {
|
||||
let monitored_services = match self.get_monitored_services().await {
|
||||
Ok(services) => services,
|
||||
Err(e) => {
|
||||
debug!("Failed to get monitored services: {}", e);
|
||||
@@ -94,7 +96,7 @@ impl SystemdCollector {
|
||||
|
||||
// Sub-service metrics for specific services (always include cached results)
|
||||
if service_name.contains("nginx") && active_status == "active" {
|
||||
let nginx_sites = self.get_nginx_site_metrics();
|
||||
let nginx_sites = self.get_nginx_site_metrics().await;
|
||||
for (site_name, latency_ms) in nginx_sites {
|
||||
let site_status = if latency_ms >= 0.0 && latency_ms < self.config.nginx_latency_critical_ms {
|
||||
"active"
|
||||
@@ -119,7 +121,7 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
if service_name.contains("docker") && active_status == "active" {
|
||||
let docker_containers = self.get_docker_containers();
|
||||
let docker_containers = self.get_docker_containers().await;
|
||||
for (container_name, container_status) in docker_containers {
|
||||
// For now, docker containers have no additional metrics
|
||||
// Future: could add memory_mb, cpu_percent, restart_count, etc.
|
||||
@@ -134,7 +136,7 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
// Add Docker images
|
||||
let docker_images = self.get_docker_images();
|
||||
let docker_images = self.get_docker_images().await;
|
||||
for (image_name, image_status, image_size_mb) in docker_images {
|
||||
let mut metrics = Vec::new();
|
||||
metrics.push(SubServiceMetric {
|
||||
@@ -190,7 +192,7 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
/// Get monitored services, discovering them if needed or cache is expired
|
||||
fn get_monitored_services(&self) -> Result<Vec<String>> {
|
||||
async fn get_monitored_services(&self) -> Result<Vec<String>> {
|
||||
// Check if we need discovery without holding the lock
|
||||
let needs_discovery = {
|
||||
let state = self.state.read().unwrap();
|
||||
@@ -205,7 +207,7 @@ impl SystemdCollector {
|
||||
|
||||
if needs_discovery {
|
||||
debug!("Discovering systemd services (cache expired or first run)");
|
||||
match self.discover_services_internal() {
|
||||
match self.discover_services_internal().await {
|
||||
Ok((services, status_cache)) => {
|
||||
if let Ok(mut state) = self.state.write() {
|
||||
state.monitored_services = services.clone();
|
||||
@@ -228,45 +230,52 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
/// Get nginx site metrics, checking them if cache is expired (like old working version)
|
||||
fn get_nginx_site_metrics(&self) -> Vec<(String, f32)> {
|
||||
let mut state = self.state.write().unwrap();
|
||||
|
||||
// Check if we need to refresh nginx site metrics
|
||||
let needs_refresh = match state.last_nginx_check_time {
|
||||
None => true, // First time
|
||||
Some(last_time) => {
|
||||
let elapsed = last_time.elapsed().as_secs();
|
||||
elapsed >= state.nginx_check_interval_seconds
|
||||
async fn get_nginx_site_metrics(&self) -> Vec<(String, f32)> {
|
||||
// Check if we need to refresh (read lock)
|
||||
let needs_refresh = {
|
||||
let state = self.state.read().unwrap();
|
||||
match state.last_nginx_check_time {
|
||||
None => true,
|
||||
Some(last_time) => {
|
||||
let elapsed = last_time.elapsed().as_secs();
|
||||
elapsed >= state.nginx_check_interval_seconds
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if needs_refresh {
|
||||
// Only check nginx sites if nginx service is active
|
||||
if state.monitored_services.iter().any(|s| s.contains("nginx")) {
|
||||
let fresh_metrics = self.get_nginx_sites_internal();
|
||||
// Check if nginx is active (read lock)
|
||||
let has_nginx = {
|
||||
let state = self.state.read().unwrap();
|
||||
state.monitored_services.iter().any(|s| s.contains("nginx"))
|
||||
};
|
||||
|
||||
if has_nginx {
|
||||
let fresh_metrics = self.get_nginx_sites_internal().await;
|
||||
let mut state = self.state.write().unwrap();
|
||||
state.nginx_site_metrics = fresh_metrics;
|
||||
state.last_nginx_check_time = Some(Instant::now());
|
||||
}
|
||||
}
|
||||
|
||||
let state = self.state.read().unwrap();
|
||||
state.nginx_site_metrics.clone()
|
||||
}
|
||||
|
||||
/// Auto-discover interesting services to monitor
|
||||
fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
|
||||
// First: Get all service unit files
|
||||
let timeout_str = self.config.command_timeout_seconds.to_string();
|
||||
/// Auto-discover interesting services to monitor using systemctl
|
||||
async fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
|
||||
// First: Get all service unit files (with 3 second timeout)
|
||||
let unit_files_output = Command::new("timeout")
|
||||
.args(&[&timeout_str, "systemctl", "list-unit-files", "--type=service", "--no-pager", "--plain"])
|
||||
.args(&["3", "systemctl", "list-unit-files", "--type=service", "--no-pager", "--plain"])
|
||||
.output()?;
|
||||
|
||||
if !unit_files_output.status.success() {
|
||||
return Err(anyhow::anyhow!("systemctl list-unit-files command failed"));
|
||||
}
|
||||
|
||||
// Second: Get runtime status of all units
|
||||
// Second: Get runtime status of all units (with 3 second timeout)
|
||||
let units_status_output = Command::new("timeout")
|
||||
.args(&[&timeout_str, "systemctl", "list-units", "--type=service", "--all", "--no-pager", "--plain"])
|
||||
.args(&["3", "systemctl", "list-units", "--type=service", "--all", "--no-pager", "--plain"])
|
||||
.output()?;
|
||||
|
||||
if !units_status_output.status.success() {
|
||||
@@ -346,9 +355,9 @@ impl SystemdCollector {
|
||||
Ok((services, status_cache))
|
||||
}
|
||||
|
||||
/// Get service status from cache (if available) or fallback to systemctl
|
||||
/// Get service status from D-Bus cache
|
||||
fn get_service_status(&self, service: &str) -> Result<(String, String)> {
|
||||
// Try to get status from cache first
|
||||
// Get status from D-Bus cache (populated by discover_services_internal)
|
||||
if let Ok(state) = self.state.read() {
|
||||
if let Some(cached_info) = state.service_status_cache.get(service) {
|
||||
let active_status = cached_info.active_state.clone();
|
||||
@@ -362,21 +371,25 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to systemctl if not in cache
|
||||
let timeout_str = self.config.command_timeout_seconds.to_string();
|
||||
let output = Command::new("timeout")
|
||||
.args(&[&timeout_str, "systemctl", "is-active", &format!("{}.service", service)])
|
||||
.output()?;
|
||||
// Service not found in D-Bus cache - treat as inactive
|
||||
Ok(("inactive".to_string(), "LoadState=not-found\nActiveState=inactive\nSubState=dead".to_string()))
|
||||
}
|
||||
|
||||
let active_status = String::from_utf8(output.stdout)?.trim().to_string();
|
||||
/// Get a unit property via systemctl show
|
||||
fn get_unit_property(&self, service_name: &str, property: &str) -> Option<String> {
|
||||
let output = Command::new("systemctl")
|
||||
.args(&["show", &format!("{}.service", service_name), &format!("--property={}", property)])
|
||||
.output()
|
||||
.ok()?;
|
||||
|
||||
// Get more detailed info
|
||||
let output = Command::new("timeout")
|
||||
.args(&[&timeout_str, "systemctl", "show", &format!("{}.service", service), "--property=LoadState,ActiveState,SubState"])
|
||||
.output()?;
|
||||
if !output.status.success() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let detailed_info = String::from_utf8(output.stdout)?;
|
||||
Ok((active_status, detailed_info))
|
||||
let output_str = String::from_utf8(output.stdout).ok()?;
|
||||
// Parse "PropertyName=value" format
|
||||
let value = output_str.trim().strip_prefix(&format!("{}=", property))?;
|
||||
Some(value.to_string())
|
||||
}
|
||||
|
||||
/// Check if service name matches pattern (supports wildcards like nginx*)
|
||||
@@ -433,37 +446,24 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
// No configured path - try to get WorkingDirectory from systemctl
|
||||
let timeout_str = self.config.command_timeout_seconds.to_string();
|
||||
let output = Command::new("timeout")
|
||||
.args(&[&timeout_str, "systemctl", "show", &format!("{}.service", service_name), "--property=WorkingDirectory"])
|
||||
.output()
|
||||
.map_err(|e| CollectorError::SystemRead {
|
||||
path: format!("WorkingDirectory for {}", service_name),
|
||||
error: e.to_string(),
|
||||
})?;
|
||||
|
||||
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||
for line in output_str.lines() {
|
||||
if line.starts_with("WorkingDirectory=") && !line.contains("[not set]") {
|
||||
let dir = line.strip_prefix("WorkingDirectory=").unwrap_or("");
|
||||
if !dir.is_empty() && dir != "/" {
|
||||
return Ok(self.get_directory_size(dir).await.unwrap_or(0.0));
|
||||
}
|
||||
if let Some(dir_str) = self.get_unit_property(service_name, "WorkingDirectory") {
|
||||
if !dir_str.is_empty() && dir_str != "/" && dir_str != "[not set]" {
|
||||
return Ok(self.get_directory_size(&dir_str).await.unwrap_or(0.0));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(0.0)
|
||||
}
|
||||
|
||||
/// Get size of a directory in GB
|
||||
/// Get size of a directory in GB (with 2 second timeout)
|
||||
async fn get_directory_size(&self, path: &str) -> Option<f32> {
|
||||
use super::run_command_with_timeout;
|
||||
|
||||
// Use -s (summary) and --apparent-size for speed
|
||||
// Use -s (summary) and --apparent-size for speed, 2 second timeout
|
||||
let mut cmd = Command::new("sudo");
|
||||
cmd.args(&["du", "-s", "--apparent-size", "--block-size=1", path]);
|
||||
|
||||
let output = run_command_with_timeout(cmd, self.config.command_timeout_seconds).await.ok()?;
|
||||
let output = run_command_with_timeout(cmd, 2).await.ok()?;
|
||||
|
||||
if !output.status.success() {
|
||||
// Log permission errors for debugging but don't spam logs
|
||||
@@ -510,27 +510,13 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get memory usage for a specific service
|
||||
/// Get memory usage for a specific service via systemctl
|
||||
async fn get_service_memory_usage(&self, service_name: &str) -> Result<f32, CollectorError> {
|
||||
let output = Command::new("systemctl")
|
||||
.args(&["show", &format!("{}.service", service_name), "--property=MemoryCurrent"])
|
||||
.output()
|
||||
.map_err(|e| CollectorError::SystemRead {
|
||||
path: format!("memory usage for {}", service_name),
|
||||
error: e.to_string(),
|
||||
})?;
|
||||
|
||||
let output_str = String::from_utf8_lossy(&output.stdout);
|
||||
|
||||
for line in output_str.lines() {
|
||||
if line.starts_with("MemoryCurrent=") {
|
||||
if let Some(mem_str) = line.strip_prefix("MemoryCurrent=") {
|
||||
if mem_str != "[not set]" {
|
||||
if let Ok(memory_bytes) = mem_str.parse::<u64>() {
|
||||
return Ok(memory_bytes as f32 / (1024.0 * 1024.0)); // Convert to MB
|
||||
}
|
||||
}
|
||||
}
|
||||
// Get MemoryCurrent property from systemctl
|
||||
if let Some(value_str) = self.get_unit_property(service_name, "MemoryCurrent") {
|
||||
// MemoryCurrent is in bytes or could be "[not set]"
|
||||
if let Ok(memory_bytes) = value_str.parse::<u64>() {
|
||||
return Ok(memory_bytes as f32 / (1024.0 * 1024.0)); // Convert to MB
|
||||
}
|
||||
}
|
||||
|
||||
@@ -544,7 +530,7 @@ impl SystemdCollector {
|
||||
match state.last_collection {
|
||||
None => true,
|
||||
Some(last) => {
|
||||
let cache_duration = std::time::Duration::from_secs(30);
|
||||
let cache_duration = std::time::Duration::from_secs(self.config.interval_seconds);
|
||||
last.elapsed() > cache_duration
|
||||
}
|
||||
}
|
||||
@@ -561,11 +547,11 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
/// Get nginx sites with latency checks (internal - no caching)
|
||||
fn get_nginx_sites_internal(&self) -> Vec<(String, f32)> {
|
||||
async fn get_nginx_sites_internal(&self) -> Vec<(String, f32)> {
|
||||
let mut sites = Vec::new();
|
||||
|
||||
// Discover nginx sites from configuration
|
||||
let discovered_sites = self.discover_nginx_sites();
|
||||
let discovered_sites = self.discover_nginx_sites().await;
|
||||
|
||||
// Always add all discovered sites, even if checks fail (like old version)
|
||||
for (site_name, url) in &discovered_sites {
|
||||
@@ -584,9 +570,9 @@ impl SystemdCollector {
|
||||
}
|
||||
|
||||
/// Discover nginx sites from configuration
|
||||
fn discover_nginx_sites(&self) -> Vec<(String, String)> {
|
||||
async fn discover_nginx_sites(&self) -> Vec<(String, String)> {
|
||||
// Use the same approach as the old working agent: get nginx config from systemd
|
||||
let config_content = match self.get_nginx_config_from_systemd() {
|
||||
let config_content = match self.get_nginx_config_from_systemd().await {
|
||||
Some(content) => content,
|
||||
None => {
|
||||
debug!("Could not get nginx config from systemd, trying nginx -T fallback");
|
||||
@@ -619,30 +605,16 @@ impl SystemdCollector {
|
||||
Some(String::from_utf8_lossy(&output.stdout).to_string())
|
||||
}
|
||||
|
||||
/// Get nginx config from systemd service definition (NixOS compatible)
|
||||
fn get_nginx_config_from_systemd(&self) -> Option<String> {
|
||||
let output = Command::new("systemctl")
|
||||
.args(&["show", "nginx", "--property=ExecStart", "--no-pager"])
|
||||
.output()
|
||||
.ok()?;
|
||||
/// Get nginx config from systemd service definition via systemctl (NixOS compatible)
|
||||
async fn get_nginx_config_from_systemd(&self) -> Option<String> {
|
||||
// Get ExecStart property from systemctl
|
||||
let exec_start_str = self.get_unit_property("nginx", "ExecStart")?;
|
||||
debug!("nginx ExecStart from systemctl: {}", exec_start_str);
|
||||
|
||||
if !output.status.success() {
|
||||
debug!("Failed to get nginx ExecStart from systemd");
|
||||
return None;
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
debug!("systemctl show nginx output: {}", stdout);
|
||||
|
||||
// Parse ExecStart to extract -c config path
|
||||
for line in stdout.lines() {
|
||||
if line.starts_with("ExecStart=") {
|
||||
debug!("Found ExecStart line: {}", line);
|
||||
if let Some(config_path) = self.extract_config_path_from_exec_start(line) {
|
||||
debug!("Extracted config path: {}", config_path);
|
||||
return std::fs::read_to_string(&config_path).ok();
|
||||
}
|
||||
}
|
||||
// Extract config path from ExecStart structure
|
||||
if let Some(config_path) = self.extract_config_path_from_exec_start(&exec_start_str) {
|
||||
debug!("Extracted config path: {}", config_path);
|
||||
return std::fs::read_to_string(&config_path).ok();
|
||||
}
|
||||
|
||||
None
|
||||
@@ -784,96 +756,91 @@ impl SystemdCollector {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get docker containers as sub-services
|
||||
fn get_docker_containers(&self) -> Vec<(String, String)> {
|
||||
/// Get docker containers as sub-services using bollard API
|
||||
async fn get_docker_containers(&self) -> Vec<(String, String)> {
|
||||
let mut containers = Vec::new();
|
||||
|
||||
// Check if docker is available (cm-agent user is in docker group)
|
||||
// Use -a to show ALL containers (running and stopped)
|
||||
let timeout_str = self.config.command_timeout_seconds.to_string();
|
||||
let output = Command::new("timeout")
|
||||
.args(&[&timeout_str, "docker", "ps", "-a", "--format", "{{.Names}},{{.Status}}"])
|
||||
.output();
|
||||
|
||||
let output = match output {
|
||||
Ok(out) if out.status.success() => out,
|
||||
_ => return containers, // Docker not available or failed
|
||||
// Connect to Docker daemon
|
||||
let docker = match Docker::connect_with_local_defaults() {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
debug!("Failed to connect to Docker daemon: {}", e);
|
||||
return containers;
|
||||
}
|
||||
};
|
||||
|
||||
let output_str = match String::from_utf8(output.stdout) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return containers,
|
||||
// List all containers (running and stopped)
|
||||
let list_options = Some(ListContainersOptions::<String> {
|
||||
all: true,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let container_list = match docker.list_containers(list_options).await {
|
||||
Ok(list) => list,
|
||||
Err(e) => {
|
||||
debug!("Failed to list Docker containers: {}", e);
|
||||
return containers;
|
||||
}
|
||||
};
|
||||
|
||||
for line in output_str.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
for container in container_list {
|
||||
// Get container name (remove leading slash if present)
|
||||
let container_name = container.names
|
||||
.and_then(|names| names.first().map(|n| n.trim_start_matches('/').to_string()))
|
||||
.unwrap_or_else(|| container.id.clone().unwrap_or_default());
|
||||
|
||||
let parts: Vec<&str> = line.split(',').collect();
|
||||
if parts.len() >= 2 {
|
||||
let container_name = parts[0].trim();
|
||||
let status_str = parts[1].trim();
|
||||
// Map container state to service status
|
||||
let container_status = match container.state.as_deref() {
|
||||
Some("running") => "active",
|
||||
Some("exited") | Some("created") => "inactive",
|
||||
_ => "failed", // restarting, paused, dead, etc.
|
||||
};
|
||||
|
||||
let container_status = if status_str.contains("Up") {
|
||||
"active"
|
||||
} else if status_str.contains("Exited") || status_str.contains("Created") {
|
||||
"inactive" // Stopped/created containers are inactive
|
||||
} else {
|
||||
"failed" // Other states (restarting, paused, dead) → failed
|
||||
};
|
||||
|
||||
containers.push((format!("docker_{}", container_name), container_status.to_string()));
|
||||
}
|
||||
containers.push((format!("docker_{}", container_name), container_status.to_string()));
|
||||
}
|
||||
|
||||
containers
|
||||
}
|
||||
|
||||
/// Get docker images as sub-services
|
||||
fn get_docker_images(&self) -> Vec<(String, String, f32)> {
|
||||
/// Get docker images as sub-services using bollard API
|
||||
async fn get_docker_images(&self) -> Vec<(String, String, f32)> {
|
||||
let mut images = Vec::new();
|
||||
// Check if docker is available (cm-agent user is in docker group)
|
||||
let timeout_str = self.config.command_timeout_seconds.to_string();
|
||||
let output = Command::new("timeout")
|
||||
.args(&[&timeout_str, "docker", "images", "--format", "{{.Repository}}:{{.Tag}},{{.Size}}"])
|
||||
.output();
|
||||
|
||||
let output = match output {
|
||||
Ok(out) if out.status.success() => out,
|
||||
Ok(_) => {
|
||||
return images;
|
||||
}
|
||||
Err(_) => {
|
||||
// Connect to Docker daemon
|
||||
let docker = match Docker::connect_with_local_defaults() {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
debug!("Failed to connect to Docker daemon: {}", e);
|
||||
return images;
|
||||
}
|
||||
};
|
||||
|
||||
let output_str = match String::from_utf8(output.stdout) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return images,
|
||||
// List all images
|
||||
let image_list = match docker.list_images::<String>(None).await {
|
||||
Ok(list) => list,
|
||||
Err(e) => {
|
||||
debug!("Failed to list Docker images: {}", e);
|
||||
return images;
|
||||
}
|
||||
};
|
||||
|
||||
for line in output_str.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
for image in image_list {
|
||||
// Get image name from repo tags
|
||||
let image_names: Vec<String> = image.repo_tags
|
||||
.into_iter()
|
||||
.filter(|tag| !tag.contains("<none>"))
|
||||
.collect();
|
||||
|
||||
if image_names.is_empty() {
|
||||
continue; // Skip untagged images
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = line.split(',').collect();
|
||||
if parts.len() >= 2 {
|
||||
let image_name = parts[0].trim();
|
||||
let size_str = parts[1].trim();
|
||||
|
||||
// Skip <none>:<none> images (dangling images)
|
||||
if image_name.contains("<none>") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse size to MB (sizes come as "142MB", "1.5GB", "512kB", etc.)
|
||||
let size_mb = self.parse_docker_size(size_str);
|
||||
// Get size in MB
|
||||
let size_mb = image.size as f32 / (1024.0 * 1024.0);
|
||||
|
||||
for image_name in image_names {
|
||||
images.push((
|
||||
image_name.to_string(),
|
||||
image_name,
|
||||
"inactive".to_string(), // Images are informational - use inactive for neutral display
|
||||
size_mb
|
||||
));
|
||||
@@ -882,42 +849,11 @@ impl SystemdCollector {
|
||||
|
||||
images
|
||||
}
|
||||
|
||||
/// Parse Docker size string to MB
|
||||
fn parse_docker_size(&self, size_str: &str) -> f32 {
|
||||
let size_upper = size_str.to_uppercase();
|
||||
|
||||
// Extract numeric part and unit
|
||||
let mut num_str = String::new();
|
||||
let mut unit = String::new();
|
||||
|
||||
for ch in size_upper.chars() {
|
||||
if ch.is_ascii_digit() || ch == '.' {
|
||||
num_str.push(ch);
|
||||
} else if ch.is_alphabetic() {
|
||||
unit.push(ch);
|
||||
}
|
||||
}
|
||||
|
||||
let value: f32 = num_str.parse().unwrap_or(0.0);
|
||||
|
||||
// Convert to MB
|
||||
match unit.as_str() {
|
||||
"KB" | "K" => value / 1024.0,
|
||||
"MB" | "M" => value,
|
||||
"GB" | "G" => value * 1024.0,
|
||||
"TB" | "T" => value * 1024.0 * 1024.0,
|
||||
_ => value, // Assume bytes if no unit
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Collector for SystemdCollector {
|
||||
async fn collect_structured(&self, agent_data: &mut AgentData) -> Result<(), CollectorError> {
|
||||
// Clear existing services data to prevent duplicates in cached architecture
|
||||
agent_data.services.clear();
|
||||
|
||||
// Use cached complete data if available and fresh
|
||||
if let Some(cached_complete_services) = self.get_cached_complete_services() {
|
||||
for service_data in cached_complete_services {
|
||||
|
||||
@@ -1,65 +1,55 @@
|
||||
use anyhow::Result;
|
||||
use cm_dashboard_shared::{AgentData, MessageEnvelope};
|
||||
use tracing::{debug, info};
|
||||
use zmq::{Context, Socket, SocketType};
|
||||
|
||||
use crate::config::ZmqConfig;
|
||||
|
||||
/// ZMQ communication handler for receiving commands
|
||||
/// NOTE: Publishing is handled by dedicated thread in Agent::run()
|
||||
/// ZMQ communication handler for publishing metrics
|
||||
pub struct ZmqHandler {
|
||||
command_receiver: Socket,
|
||||
publisher: Socket,
|
||||
}
|
||||
|
||||
impl ZmqHandler {
|
||||
pub async fn new(config: &ZmqConfig) -> Result<Self> {
|
||||
let context = Context::new();
|
||||
|
||||
// Create command receiver socket (PULL socket to receive commands from dashboard)
|
||||
let command_receiver = context.socket(SocketType::PULL)?;
|
||||
let cmd_bind_address = format!("tcp://{}:{}", config.bind_address, config.command_port);
|
||||
command_receiver.bind(&cmd_bind_address)?;
|
||||
// Create publisher socket for metrics
|
||||
let publisher = context.socket(SocketType::PUB)?;
|
||||
let pub_bind_address = format!("tcp://{}:{}", config.bind_address, config.publisher_port);
|
||||
publisher.bind(&pub_bind_address)?;
|
||||
|
||||
info!("ZMQ command receiver bound to {}", cmd_bind_address);
|
||||
info!("ZMQ publisher bound to {}", pub_bind_address);
|
||||
|
||||
// Set non-blocking mode for command receiver
|
||||
command_receiver.set_rcvtimeo(0)?; // Non-blocking receive
|
||||
command_receiver.set_linger(1000)?;
|
||||
// Set socket options for efficiency
|
||||
publisher.set_sndhwm(1000)?; // High water mark for outbound messages
|
||||
publisher.set_linger(1000)?; // Linger time on close
|
||||
|
||||
Ok(Self {
|
||||
command_receiver,
|
||||
publisher,
|
||||
})
|
||||
}
|
||||
|
||||
/// Try to receive a command (non-blocking)
|
||||
pub fn try_receive_command(&self) -> Result<Option<AgentCommand>> {
|
||||
match self.command_receiver.recv_bytes(zmq::DONTWAIT) {
|
||||
Ok(bytes) => {
|
||||
debug!("Received command message ({} bytes)", bytes.len());
|
||||
|
||||
let command: AgentCommand = serde_json::from_slice(&bytes)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to deserialize command: {}", e))?;
|
||||
/// Publish agent data via ZMQ
|
||||
pub async fn publish_agent_data(&self, data: &AgentData) -> Result<()> {
|
||||
debug!(
|
||||
"Publishing agent data for host {}",
|
||||
data.hostname
|
||||
);
|
||||
|
||||
debug!("Parsed command: {:?}", command);
|
||||
Ok(Some(command))
|
||||
}
|
||||
Err(zmq::Error::EAGAIN) => {
|
||||
// No message available (non-blocking)
|
||||
Ok(None)
|
||||
}
|
||||
Err(e) => Err(anyhow::anyhow!("ZMQ receive error: {}", e)),
|
||||
}
|
||||
// Create message envelope for agent data
|
||||
let envelope = MessageEnvelope::agent_data(data.clone())
|
||||
.map_err(|e| anyhow::anyhow!("Failed to create agent data envelope: {}", e))?;
|
||||
|
||||
// Serialize envelope
|
||||
let serialized = serde_json::to_vec(&envelope)?;
|
||||
|
||||
// Send via ZMQ
|
||||
self.publisher.send(&serialized, 0)?;
|
||||
|
||||
debug!("Published agent data message ({} bytes)", serialized.len());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Commands that can be sent to the agent
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
pub enum AgentCommand {
|
||||
/// Request immediate metric collection
|
||||
CollectNow,
|
||||
/// Change collection interval
|
||||
SetInterval { seconds: u64 },
|
||||
/// Enable/disable a collector
|
||||
ToggleCollector { name: String, enabled: bool },
|
||||
/// Request status/health check
|
||||
Ping,
|
||||
}
|
||||
|
||||
@@ -20,7 +20,6 @@ pub struct AgentConfig {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ZmqConfig {
|
||||
pub publisher_port: u16,
|
||||
pub command_port: u16,
|
||||
pub bind_address: String,
|
||||
pub transmission_interval_seconds: u64,
|
||||
/// Heartbeat transmission interval in seconds for host connectivity detection
|
||||
@@ -79,9 +78,6 @@ pub struct DiskConfig {
|
||||
pub temperature_critical_celsius: f32,
|
||||
pub wear_warning_percent: f32,
|
||||
pub wear_critical_percent: f32,
|
||||
/// Command timeout in seconds for lsblk, smartctl, etc.
|
||||
#[serde(default = "default_disk_command_timeout")]
|
||||
pub command_timeout_seconds: u64,
|
||||
}
|
||||
|
||||
/// Filesystem configuration entry
|
||||
@@ -111,9 +107,6 @@ pub struct SystemdConfig {
|
||||
pub http_timeout_seconds: u64,
|
||||
pub http_connect_timeout_seconds: u64,
|
||||
pub nginx_latency_critical_ms: f32,
|
||||
/// Command timeout in seconds for systemctl, docker, du commands
|
||||
#[serde(default = "default_systemd_command_timeout")]
|
||||
pub command_timeout_seconds: u64,
|
||||
}
|
||||
|
||||
|
||||
@@ -138,9 +131,6 @@ pub struct BackupConfig {
|
||||
pub struct NetworkConfig {
|
||||
pub enabled: bool,
|
||||
pub interval_seconds: u64,
|
||||
/// Command timeout in seconds for ip route, ip addr commands
|
||||
#[serde(default = "default_network_command_timeout")]
|
||||
pub command_timeout_seconds: u64,
|
||||
}
|
||||
|
||||
/// Notification configuration
|
||||
@@ -154,9 +144,6 @@ pub struct NotificationConfig {
|
||||
pub rate_limit_minutes: u64,
|
||||
/// Email notification batching interval in seconds (default: 60)
|
||||
pub aggregation_interval_seconds: u64,
|
||||
/// Status check interval in seconds for detecting changes (default: 30)
|
||||
#[serde(default = "default_notification_check_interval")]
|
||||
pub check_interval_seconds: u64,
|
||||
/// List of metric names to exclude from email notifications
|
||||
#[serde(default)]
|
||||
pub exclude_email_metrics: Vec<String>,
|
||||
@@ -170,26 +157,10 @@ fn default_heartbeat_interval_seconds() -> u64 {
|
||||
5
|
||||
}
|
||||
|
||||
fn default_notification_check_interval() -> u64 {
|
||||
30
|
||||
}
|
||||
|
||||
fn default_maintenance_mode_file() -> String {
|
||||
"/tmp/cm-maintenance".to_string()
|
||||
}
|
||||
|
||||
fn default_disk_command_timeout() -> u64 {
|
||||
30
|
||||
}
|
||||
|
||||
fn default_systemd_command_timeout() -> u64 {
|
||||
15
|
||||
}
|
||||
|
||||
fn default_network_command_timeout() -> u64 {
|
||||
10
|
||||
}
|
||||
|
||||
impl AgentConfig {
|
||||
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||
loader::load_config(path)
|
||||
|
||||
@@ -7,14 +7,6 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
|
||||
bail!("ZMQ publisher port cannot be 0");
|
||||
}
|
||||
|
||||
if config.zmq.command_port == 0 {
|
||||
bail!("ZMQ command port cannot be 0");
|
||||
}
|
||||
|
||||
if config.zmq.publisher_port == config.zmq.command_port {
|
||||
bail!("ZMQ publisher and command ports cannot be the same");
|
||||
}
|
||||
|
||||
if config.zmq.bind_address.is_empty() {
|
||||
bail!("ZMQ bind address cannot be empty");
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard"
|
||||
version = "0.1.195"
|
||||
version = "0.1.200"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "cm-dashboard-shared"
|
||||
version = "0.1.195"
|
||||
version = "0.1.200"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
|
||||
Reference in New Issue
Block a user