Implements ZMQ command protocol for dashboard-to-agent communication: - Agents listen on port 6131 for REQ/REP commands - Dashboard sends "refresh" command when 'r' key is pressed - Agents force immediate collection of all metrics via force_refresh_all() - Fresh data is broadcast immediately to dashboard - Updated help text to show "r: Refresh all metrics" Also includes metric-level caching architecture foundation for future granular control over individual metric update frequencies.
152 lines
5.2 KiB
Bash
Executable File
152 lines
5.2 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Test script for smart caching agent
|
|
# Debug why only System collector works but Services/SMART/Backup don't
|
|
|
|
set -e
|
|
|
|
echo "=== CM Dashboard Smart Agent Debug Test ==="
|
|
echo "Testing smart caching implementation..."
|
|
echo
|
|
|
|
# Build the agent first
|
|
echo "Building agent..."
|
|
OPENSSL_DIR=/nix/store/cz9k6nhxjppa1kmyf5npd0g8l89xzilw-openssl-3.5.2-dev \
|
|
OPENSSL_LIB_DIR=/nix/store/0837wpkjb27cr70bi3pc4g2rw5v9r63l-openssl-3.5.2/lib \
|
|
OPENSSL_INCLUDE_DIR=/nix/store/cz9k6nhxjppa1kmyf5npd0g8l89xzilw-openssl-3.5.2-dev/include \
|
|
PKG_CONFIG_PATH=/nix/store/cz9k6nhxjppa1kmyf5npd0g8l89xzilw-openssl-3.5.2-dev/lib/pkgconfig \
|
|
OPENSSL_NO_VENDOR=1 cargo build --workspace --release
|
|
echo "✓ Build completed"
|
|
echo
|
|
|
|
# Test 1: Verify agent starts and shows all collectors
|
|
echo "Test 1: Agent startup and collector initialization"
|
|
timeout 15s ./target/release/cm-dashboard-agent -v 2>&1 | tee /tmp/agent_startup.log &
|
|
AGENT_PID=$!
|
|
sleep 8
|
|
|
|
if kill -0 $AGENT_PID 2>/dev/null; then
|
|
echo "✓ Smart agent started successfully"
|
|
kill $AGENT_PID 2>/dev/null || true
|
|
wait $AGENT_PID 2>/dev/null || true
|
|
else
|
|
echo "✗ Smart agent failed to start"
|
|
exit 1
|
|
fi
|
|
echo
|
|
|
|
# Test 2: Analyze startup logs for collector initialization
|
|
echo "Test 2: Collector initialization analysis"
|
|
echo "Looking for collector setup messages:"
|
|
grep -E "(monitoring|collector|initialized)" /tmp/agent_startup.log || true
|
|
echo
|
|
|
|
echo "Looking for cache-related messages:"
|
|
grep -E "(cache|warming|tier)" /tmp/agent_startup.log || true
|
|
echo
|
|
|
|
echo "Looking for error messages:"
|
|
grep -E "(error|failed|Error)" /tmp/agent_startup.log || true
|
|
echo
|
|
|
|
# Test 3: Check if all expected collectors are mentioned
|
|
echo "Test 3: Expected collector verification"
|
|
EXPECTED_COLLECTORS=("SMART monitoring" "System monitoring" "Service monitoring" "Backup monitoring")
|
|
for collector in "${EXPECTED_COLLECTORS[@]}"; do
|
|
if grep -q "$collector" /tmp/agent_startup.log; then
|
|
echo "✓ Found: $collector"
|
|
else
|
|
echo "✗ Missing: $collector"
|
|
fi
|
|
done
|
|
echo
|
|
|
|
# Test 4: ZMQ message inspection (run agent for 20 seconds and capture messages)
|
|
echo "Test 4: ZMQ message capture and analysis"
|
|
echo "Starting agent and capturing ZMQ messages for 20 seconds..."
|
|
|
|
# Start the agent in background
|
|
timeout 25s ./target/release/cm-dashboard-agent -v > /tmp/agent_output.log 2>&1 &
|
|
AGENT_PID=$!
|
|
|
|
# Give agent time to start and warm cache
|
|
sleep 5
|
|
|
|
# Use netcat or ss to check ZMQ port
|
|
echo "Checking ZMQ port 6130:"
|
|
ss -tlnp | grep 6130 || echo "ZMQ port not found"
|
|
|
|
# Monitor for a bit more
|
|
sleep 15
|
|
|
|
# Stop agent
|
|
if kill -0 $AGENT_PID 2>/dev/null; then
|
|
kill $AGENT_PID 2>/dev/null || true
|
|
wait $AGENT_PID 2>/dev/null || true
|
|
fi
|
|
|
|
echo "Agent output analysis:"
|
|
echo "Total lines of output: $(wc -l < /tmp/agent_output.log)"
|
|
echo
|
|
|
|
echo "Cache-related messages:"
|
|
grep -E "(cache|Cache|warming|Warming|tier|Tier)" /tmp/agent_output.log | head -10 || echo "No cache messages found"
|
|
echo
|
|
|
|
echo "Collection messages:"
|
|
grep -E "(collection|Collection|collected|Collected)" /tmp/agent_output.log | head -10 || echo "No collection messages found"
|
|
echo
|
|
|
|
echo "Error messages:"
|
|
grep -E "(error|Error|failed|Failed)" /tmp/agent_output.log || echo "No errors found"
|
|
echo
|
|
|
|
# Test 5: Check tier assignment
|
|
echo "Test 5: Cache tier analysis"
|
|
echo "Searching for tier assignments in startup:"
|
|
grep -E "(RealTime|Fast|Medium|Slow|Static)" /tmp/agent_startup.log || echo "No tier information found"
|
|
echo
|
|
|
|
# Test 6: Collection interval analysis
|
|
echo "Test 6: Collection interval verification"
|
|
echo "Expected intervals:"
|
|
echo "- System (RealTime): 5 seconds"
|
|
echo "- Services (Medium): 5 minutes"
|
|
echo "- SMART (Slow): 15 minutes"
|
|
echo "- Backup (Slow): 15 minutes"
|
|
echo
|
|
|
|
echo "Actual intervals found in logs:"
|
|
grep -E "(\d+\w+ intervals|\d+s intervals|\d+min intervals)" /tmp/agent_startup.log || echo "No interval information found"
|
|
echo
|
|
|
|
# Test 7: Manual collector test (if possible)
|
|
echo "Test 7: Service discovery test"
|
|
echo "Checking what services would be discovered:"
|
|
if [ -f "./target/release/cm-dashboard-agent" ]; then
|
|
echo "Services that should be monitored:"
|
|
systemctl list-units --state=active --type=service | grep -E "(gitea|immich|postgres|unifi|vaultwarden|nginx|docker|ssh)" | head -5 || echo "No interesting services found"
|
|
fi
|
|
echo
|
|
|
|
# Test 8: Check for threading issues
|
|
echo "Test 8: Threading and async analysis"
|
|
echo "Looking for async/threading issues:"
|
|
grep -E "(tokio|async|await|thread)" /tmp/agent_output.log | head -5 || echo "No async-related messages"
|
|
echo
|
|
|
|
echo "=== Test Summary ==="
|
|
echo "Agent startup log: /tmp/agent_startup.log"
|
|
echo "Agent runtime log: /tmp/agent_output.log"
|
|
echo
|
|
echo "Key findings:"
|
|
echo "1. Agent starts: $([ -f /tmp/agent_startup.log ] && echo "✓" || echo "✗")"
|
|
echo "2. Collectors found: $(grep -c "monitoring" /tmp/agent_startup.log 2>/dev/null || echo "0")"
|
|
echo "3. Cache messages: $(grep -c -i cache /tmp/agent_output.log 2>/dev/null || echo "0")"
|
|
echo "4. Errors found: $(grep -c -i error /tmp/agent_output.log 2>/dev/null || echo "0")"
|
|
echo
|
|
echo "Next steps if issues found:"
|
|
echo "- Check collector initialization in smart_agent.rs"
|
|
echo "- Verify cache tier assignments and intervals"
|
|
echo "- Debug collection scheduling in collect_tier() method"
|
|
echo "- Test individual collectors outside of smart caching" |