cm-dashboard/test_smart_agent.sh
Christoffer Martinsson 6bc7f97375 Add refresh shortkey 'r' for on-demand metrics refresh
Implements ZMQ command protocol for dashboard-to-agent communication:
- Agents listen on port 6131 for REQ/REP commands
- Dashboard sends "refresh" command when 'r' key is pressed
- Agents force immediate collection of all metrics via force_refresh_all()
- Fresh data is broadcast immediately to dashboard
- Updated help text to show "r: Refresh all metrics"

Also includes metric-level caching architecture foundation for future
granular control over individual metric update frequencies.
2025-10-15 22:30:04 +02:00

152 lines
5.2 KiB
Bash
Executable File

#!/bin/bash
# Test script for smart caching agent
# Debug why only System collector works but Services/SMART/Backup don't
set -e
echo "=== CM Dashboard Smart Agent Debug Test ==="
echo "Testing smart caching implementation..."
echo
# Build the agent first
echo "Building agent..."
OPENSSL_DIR=/nix/store/cz9k6nhxjppa1kmyf5npd0g8l89xzilw-openssl-3.5.2-dev \
OPENSSL_LIB_DIR=/nix/store/0837wpkjb27cr70bi3pc4g2rw5v9r63l-openssl-3.5.2/lib \
OPENSSL_INCLUDE_DIR=/nix/store/cz9k6nhxjppa1kmyf5npd0g8l89xzilw-openssl-3.5.2-dev/include \
PKG_CONFIG_PATH=/nix/store/cz9k6nhxjppa1kmyf5npd0g8l89xzilw-openssl-3.5.2-dev/lib/pkgconfig \
OPENSSL_NO_VENDOR=1 cargo build --workspace --release
echo "✓ Build completed"
echo
# Test 1: Verify agent starts and shows all collectors
echo "Test 1: Agent startup and collector initialization"
timeout 15s ./target/release/cm-dashboard-agent -v 2>&1 | tee /tmp/agent_startup.log &
AGENT_PID=$!
sleep 8
if kill -0 $AGENT_PID 2>/dev/null; then
echo "✓ Smart agent started successfully"
kill $AGENT_PID 2>/dev/null || true
wait $AGENT_PID 2>/dev/null || true
else
echo "✗ Smart agent failed to start"
exit 1
fi
echo
# Test 2: Analyze startup logs for collector initialization
echo "Test 2: Collector initialization analysis"
echo "Looking for collector setup messages:"
grep -E "(monitoring|collector|initialized)" /tmp/agent_startup.log || true
echo
echo "Looking for cache-related messages:"
grep -E "(cache|warming|tier)" /tmp/agent_startup.log || true
echo
echo "Looking for error messages:"
grep -E "(error|failed|Error)" /tmp/agent_startup.log || true
echo
# Test 3: Check if all expected collectors are mentioned
echo "Test 3: Expected collector verification"
EXPECTED_COLLECTORS=("SMART monitoring" "System monitoring" "Service monitoring" "Backup monitoring")
for collector in "${EXPECTED_COLLECTORS[@]}"; do
if grep -q "$collector" /tmp/agent_startup.log; then
echo "✓ Found: $collector"
else
echo "✗ Missing: $collector"
fi
done
echo
# Test 4: ZMQ message inspection (run agent for 20 seconds and capture messages)
echo "Test 4: ZMQ message capture and analysis"
echo "Starting agent and capturing ZMQ messages for 20 seconds..."
# Start the agent in background
timeout 25s ./target/release/cm-dashboard-agent -v > /tmp/agent_output.log 2>&1 &
AGENT_PID=$!
# Give agent time to start and warm cache
sleep 5
# Use netcat or ss to check ZMQ port
echo "Checking ZMQ port 6130:"
ss -tlnp | grep 6130 || echo "ZMQ port not found"
# Monitor for a bit more
sleep 15
# Stop agent
if kill -0 $AGENT_PID 2>/dev/null; then
kill $AGENT_PID 2>/dev/null || true
wait $AGENT_PID 2>/dev/null || true
fi
echo "Agent output analysis:"
echo "Total lines of output: $(wc -l < /tmp/agent_output.log)"
echo
echo "Cache-related messages:"
grep -E "(cache|Cache|warming|Warming|tier|Tier)" /tmp/agent_output.log | head -10 || echo "No cache messages found"
echo
echo "Collection messages:"
grep -E "(collection|Collection|collected|Collected)" /tmp/agent_output.log | head -10 || echo "No collection messages found"
echo
echo "Error messages:"
grep -E "(error|Error|failed|Failed)" /tmp/agent_output.log || echo "No errors found"
echo
# Test 5: Check tier assignment
echo "Test 5: Cache tier analysis"
echo "Searching for tier assignments in startup:"
grep -E "(RealTime|Fast|Medium|Slow|Static)" /tmp/agent_startup.log || echo "No tier information found"
echo
# Test 6: Collection interval analysis
echo "Test 6: Collection interval verification"
echo "Expected intervals:"
echo "- System (RealTime): 5 seconds"
echo "- Services (Medium): 5 minutes"
echo "- SMART (Slow): 15 minutes"
echo "- Backup (Slow): 15 minutes"
echo
echo "Actual intervals found in logs:"
grep -E "(\d+\w+ intervals|\d+s intervals|\d+min intervals)" /tmp/agent_startup.log || echo "No interval information found"
echo
# Test 7: Manual collector test (if possible)
echo "Test 7: Service discovery test"
echo "Checking what services would be discovered:"
if [ -f "./target/release/cm-dashboard-agent" ]; then
echo "Services that should be monitored:"
systemctl list-units --state=active --type=service | grep -E "(gitea|immich|postgres|unifi|vaultwarden|nginx|docker|ssh)" | head -5 || echo "No interesting services found"
fi
echo
# Test 8: Check for threading issues
echo "Test 8: Threading and async analysis"
echo "Looking for async/threading issues:"
grep -E "(tokio|async|await|thread)" /tmp/agent_output.log | head -5 || echo "No async-related messages"
echo
echo "=== Test Summary ==="
echo "Agent startup log: /tmp/agent_startup.log"
echo "Agent runtime log: /tmp/agent_output.log"
echo
echo "Key findings:"
echo "1. Agent starts: $([ -f /tmp/agent_startup.log ] && echo "✓" || echo "✗")"
echo "2. Collectors found: $(grep -c "monitoring" /tmp/agent_startup.log 2>/dev/null || echo "0")"
echo "3. Cache messages: $(grep -c -i cache /tmp/agent_output.log 2>/dev/null || echo "0")"
echo "4. Errors found: $(grep -c -i error /tmp/agent_output.log 2>/dev/null || echo "0")"
echo
echo "Next steps if issues found:"
echo "- Check collector initialization in smart_agent.rs"
echo "- Verify cache tier assignments and intervals"
echo "- Debug collection scheduling in collect_tier() method"
echo "- Test individual collectors outside of smart caching"