Fix Status::Inactive ordering to prevent gray title bar

- Reorder Status enum variants to fix aggregation priority - Status::Inactive now has same priority as Status::Ok in aggregation - Prevents inactive services from causing gray title bar - Title bar stays green when system has only active and inactive services - Only Unknown/Offline/Pending/Warning/Critical statuses affect title color
Add Status::Inactive for inactive services with empty circle display
2025-11-18 18:03:50 +01:00 · 2025-11-18 17:54:51 +01:00 · 2025-11-18 16:50:33 +01:00 · 2025-11-18 16:40:14 +01:00 · 2025-11-18 16:02:15 +01:00 · 2025-11-15 11:41:58 +01:00
43 changed files with 2124 additions and 3178 deletions
--- a/.gitea/workflows/release.yml
+++ b/.gitea/workflows/release.yml
@@ -0,0 +1,128 @@
+name: Build and Release
+
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'Version to release (e.g., v0.1.0)'
+        required: true
+        default: 'v0.1.0'
+
+jobs:
+  build-and-release:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Rust
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+          profile: minimal
+          override: true
+
+      - name: Install system dependencies
+        run: |
+          apt-get update
+          apt-get install -y pkg-config libssl-dev libzmq3-dev
+
+      - name: Build workspace (static)
+        run: |
+          export RUSTFLAGS="-C target-feature=+crt-static"
+          cargo build --release --workspace --target x86_64-unknown-linux-gnu
+
+      - name: Create release directory
+        run: |
+          mkdir -p release
+          cp target/x86_64-unknown-linux-gnu/release/cm-dashboard release/cm-dashboard-linux-x86_64
+          cp target/x86_64-unknown-linux-gnu/release/cm-dashboard-agent release/cm-dashboard-agent-linux-x86_64
+
+      - name: Create tarball
+        run: |
+          cd release
+          tar -czf cm-dashboard-linux-x86_64.tar.gz cm-dashboard-linux-x86_64 cm-dashboard-agent-linux-x86_64
+
+      - name: Set version variable
+        id: version
+        run: |
+          if [ "${{ gitea.event_name }}" == "workflow_dispatch" ]; then
+            echo "VERSION=${{ gitea.event.inputs.version }}" >> $GITHUB_OUTPUT
+          else
+            echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Create Release with curl
+        env:
+          GITEA_TOKEN: ${{ secrets.GITEATOKEN }}
+        run: |
+          VERSION="${{ steps.version.outputs.VERSION }}"
+          
+          # Create release
+          curl -X POST \
+            -H "Authorization: token $GITEA_TOKEN" \
+            -H "Content-Type: application/json" \
+            -d '{
+              "tag_name": "'$VERSION'",
+              "name": "cm-dashboard '$VERSION'",
+              "body": "## cm-dashboard '$VERSION'\n\nPre-built binaries for Linux x86_64:\n- cm-dashboard-linux-x86_64 - Dashboard TUI binary\n- cm-dashboard-agent-linux-x86_64 - Agent daemon binary\n- cm-dashboard-linux-x86_64.tar.gz - Combined tarball"
+            }' \
+            "https://gitea.cmtec.se/api/v1/repos/cm/cm-dashboard/releases"
+          
+          # Get release ID
+          RELEASE_ID=$(curl -s -H "Authorization: token $GITEA_TOKEN" \
+            "https://gitea.cmtec.se/api/v1/repos/cm/cm-dashboard/releases/tags/$VERSION" | \
+            grep -o '"id":[0-9]*' | head -1 | cut -d':' -f2)
+          
+          # Upload binaries
+          curl -X POST \
+            -H "Authorization: token $GITEA_TOKEN" \
+            -F "attachment=@release/cm-dashboard-linux-x86_64" \
+            "https://gitea.cmtec.se/api/v1/repos/cm/cm-dashboard/releases/$RELEASE_ID/assets?name=cm-dashboard-linux-x86_64"
+          
+          curl -X POST \
+            -H "Authorization: token $GITEA_TOKEN" \
+            -F "attachment=@release/cm-dashboard-agent-linux-x86_64" \
+            "https://gitea.cmtec.se/api/v1/repos/cm/cm-dashboard/releases/$RELEASE_ID/assets?name=cm-dashboard-agent-linux-x86_64"
+          
+          curl -X POST \
+            -H "Authorization: token $GITEA_TOKEN" \
+            -F "attachment=@release/cm-dashboard-linux-x86_64.tar.gz" \
+            "https://gitea.cmtec.se/api/v1/repos/cm/cm-dashboard/releases/$RELEASE_ID/assets?name=cm-dashboard-linux-x86_64.tar.gz"
+
+      - name: Update NixOS Configuration
+        env:
+          GITEA_TOKEN: ${{ secrets.GITEATOKEN }}
+        run: |
+          VERSION="${{ steps.version.outputs.VERSION }}"
+          
+          # Clone nixosbox repository
+          git clone https://$GITEA_TOKEN@gitea.cmtec.se/cm/nixosbox.git nixosbox-update
+          cd nixosbox-update
+          
+          # Get hash for the new release tarball
+          TARBALL_URL="https://gitea.cmtec.se/cm/cm-dashboard/releases/download/$VERSION/cm-dashboard-linux-x86_64.tar.gz"
+          
+          # Download tarball to get correct hash
+          curl -L -o cm-dashboard.tar.gz "$TARBALL_URL"
+          # Convert sha256 hex to base64 for Nix hash format using Python
+          NEW_HASH=$(sha256sum cm-dashboard.tar.gz | cut -d' ' -f1)
+          NIX_HASH="sha256-$(python3 -c "import base64, binascii; print(base64.b64encode(binascii.unhexlify('$NEW_HASH')).decode())")"
+          
+          # Update the NixOS configuration
+          sed -i "s|version = \"v[^\"]*\"|version = \"$VERSION\"|" hosts/services/cm-dashboard.nix
+          sed -i "s|sha256 = \"sha256-[^\"]*\"|sha256 = \"$NIX_HASH\"|" hosts/services/cm-dashboard.nix
+          
+          # Commit and push changes
+          git config user.name "Gitea Actions"
+          git config user.email "actions@gitea.cmtec.se"
+          git add hosts/services/cm-dashboard.nix
+          git commit -m "Auto-update cm-dashboard to $VERSION
+
+          - Update version to $VERSION with automated release
+          - Update tarball hash for new static binaries
+          - Automated update from cm-dashboard release workflow"
+          git push
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,3 +0,0 @@
-# Agent Guide
-
-Agents working in this repo must follow the instructions in `CLAUDE.md`.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -2,178 +2,80 @@

 ## Overview

-A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure. Built to replace Glance with a custom solution tailored for our specific monitoring needs and ZMQ-based metric collection.
+A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure. Built with ZMQ-based metric collection and individual metrics architecture.

-## Implementation Strategy
+## Current Features

-### Current Implementation Status
+### Core Functionality
+- **Real-time Monitoring**: CPU, RAM, Storage, and Service status
+- **Service Management**: Start/stop services with user-stopped tracking
+- **Multi-host Support**: Monitor multiple servers from single dashboard
+- **NixOS Integration**: System rebuild via SSH + tmux popup
+- **Backup Monitoring**: Borgbackup status and scheduling

-**System Panel Enhancement - COMPLETED** ✅
+### User-Stopped Service Tracking
+- Services stopped via dashboard are marked as "user-stopped"
+- User-stopped services report Status::OK instead of Warning
+- Prevents false alerts during intentional maintenance
+- Persistent storage survives agent restarts
+- Automatic flag clearing when services are restarted via dashboard

-All system panel features successfully implemented:
- ✅ **NixOS Collector**: Created collector for version and active users  
- ✅ **System Widget**: Unified widget combining NixOS, CPU, RAM, and Storage
- ✅ **Build Display**: Shows NixOS build information without codename
- ✅ **Active Users**: Displays currently logged in users
- ✅ **Tmpfs Monitoring**: Added /tmp usage to RAM section
- ✅ **Agent Deployment**: NixOS collector working in production
-
-**Keyboard Navigation and Service Management - COMPLETED** ✅
-
-All keyboard navigation and service selection features successfully implemented:
- ✅ **Panel Navigation**: Shift+Tab cycles through visible panels only (System → Services → Backup)
- ✅ **Service Selection**: Up/Down arrows navigate through parent services with visual cursor
- ✅ **Focus Management**: Selection highlighting only visible when Services panel focused
- ✅ **Status Preservation**: Service health colors maintained during selection (green/red icons)
- ✅ **Smart Panel Switching**: Only cycles through panels with data (backup panel conditional)
- ✅ **Scroll Support**: All panels support content scrolling with proper overflow indicators
-
-**Current Status - October 24, 2025:**
- All keyboard navigation features working correctly ✅
- Service selection cursor implemented with focus-aware highlighting ✅
- Panel scrolling fixed for System, Services, and Backup panels ✅
- Build display working: "Build: 25.05.20251004.3bcc93c" ✅
- Configuration hash display implemented: "Config: d16f0d0" ✅
-
-**Layout Achieved:**
-```
-NixOS:
-Build: 25.05.20251004.3bcc93c
-Config: d16f0d0  # Shows actual nixosbox config hash
-Active users: cm, simon
-CPU:
-● Load: 0.02 0.31 0.86 • 3000MHz
-RAM:
-● Usage: 33% 2.6GB/7.6GB  
-● /tmp: 0% 0B/2.0GB  
-Storage:  
-● root (Single):  
- ├─ ● nvme0n1 W: 1%
- └─ ● 18% 167.4GB/928.2GB
+### Custom Service Logs
+- Configure service-specific log file paths per host in dashboard config
+- Press `L` on any service to view custom log files via `tail -f`
+- Configuration format in dashboard config:
+```toml
+[service_logs]
+hostname1 = [
+  { service_name = "nginx", log_file_path = "/var/log/nginx/access.log" },
+  { service_name = "app", log_file_path = "/var/log/myapp/app.log" }
+]
+hostname2 = [
+  { service_name = "database", log_file_path = "/var/log/postgres/postgres.log" }
+]
 ```

-**System panel layout fully implemented with blue tree symbols ✅**
-**Tree symbols now use consistent blue theming across all panels ✅**
-**Overflow handling restored for all widgets ("... and X more") ✅**
-**Agent hash display working correctly ✅**
+### Service Management
+- **Direct Control**: Arrow keys (↑↓) or vim keys (j/k) navigate services
+- **Service Actions**: 
+  - `s` - Start service (sends UserStart command)
+  - `S` - Stop service (sends UserStop command)
+  - `J` - Show service logs (journalctl in tmux popup)
+  - `L` - Show custom log files (tail -f custom paths in tmux popup)
+  - `R` - Rebuild current host
+- **Visual Status**: Green ● (active), Yellow ◐ (inactive), Red ◯ (failed)
+- **Transitional Icons**: Blue arrows during operations

-### Current Keyboard Navigation Implementation
-
-**Navigation Controls:**
- **Tab**: Switch between hosts (cmbox, srv01, srv02, steambox, etc.)
- **Shift+Tab**: Cycle through visible panels (System → Services → Backup → System)
- **Up/Down (System/Backup)**: Scroll through panel content
- **Up/Down (Services)**: Move service selection cursor between parent services
+### Navigation
+- **Tab**: Switch between hosts
+- **↑↓ or j/k**: Select services
+- **s**: Start selected service (UserStart)
+- **S**: Stop selected service (UserStop)
+- **J**: Show service logs (journalctl)
+- **L**: Show custom log files
+- **R**: Rebuild current host
+- **B**: Run backup on current host
 - **q**: Quit dashboard

-**Panel-Specific Features:**
- **System Panel**: Scrollable content with CPU, RAM, Storage details
- **Services Panel**: Service selection cursor for parent services only (docker, nginx, postgresql, etc.)
- **Backup Panel**: Scrollable repository list with proper overflow handling
-
-**Visual Feedback:**
- **Focused Panel**: Blue border and title highlighting
- **Service Selection**: Blue background with preserved status icon colors (green ● for active, red ● for failed)
- **Focus-Aware Selection**: Selection highlighting only visible when Services panel focused
- **Dynamic Statusbar**: Context-aware shortcuts based on focused panel
-
-### Remote Command Execution - WORKING ✅
-
-**All Issues Resolved (as of 2025-10-24):**
- ✅ **ZMQ Command Protocol**: Extended with ServiceControl and SystemRebuild variants
- ✅ **Agent Handlers**: systemctl and nixos-rebuild execution with maintenance mode
- ✅ **Dashboard Integration**: Keyboard shortcuts execute commands
- ✅ **Service Control**: Fixed toggle logic - replaced with separate 's' (start) and 'S' (stop)
- ✅ **System Rebuild**: Fixed permission issues and sandboxing problems
- ✅ **Git Clone Approach**: Implemented for nixos-rebuild to avoid directory permissions
- ✅ **Visual Feedback**: Directional arrows for service status (↑ starting, ↓ stopping, ↻ restarting)
-
-**Keyboard Controls Status:**
- **Services Panel**: 
-  - R (restart) ✅ Working
-  - s (start) ✅ Working  
-  - S (stop) ✅ Working
- **System Panel**: R (nixos-rebuild) ✅ Working with --option sandbox false
- **Backup Panel**: B (trigger backup) ❓ Not implemented
-
-**Visual Feedback Implementation - IN PROGRESS:**
-
-Context-appropriate progress indicators for each panel:
-
-**Services Panel** (Service status transitions):
-```
-● nginx          active    →  ⏳ nginx      restarting  →  ● nginx          active
-● docker         active    →  ⏳ docker     stopping    →  ● docker         inactive  
-```
-
-**System Panel** (Build progress in NixOS section):
-```
-NixOS:
-Build: 25.05.20251004.3bcc93c    →    Build: [████████████     ] 65%
-Active users: cm, simon               Active users: cm, simon
-```
-
-**Backup Panel** (OnGoing status with progress):
-```
-Latest backup:              →    Latest backup:
-● 2024-10-23 14:32:15            ● OnGoing  
-└─ Duration: 1.3m                 └─ [██████       ] 60%
-```
-
-**Next Session Priority Tasks:**
-
-**Remaining Features:**
-1. **Command Response Protocol**:
-   - Agent sends command completion/failure back to dashboard via ZMQ
-   - Dashboard updates UI status from ⏳ to ● when commands complete
-   - Clear success/failure status after timeout
-
-2. **Backup Panel Features**:
-   - Implement backup trigger functionality (B key)
-   - Complete visual feedback for backup operations
-   - Add backup progress indicators
-
-**Enhancement Tasks:**
- Add confirmation dialogs for destructive actions (stop/restart/rebuild)
- Implement command history/logging
- Add keyboard shortcuts help overlay
-
-**Future Enhanced Navigation:**
- Add Page Up/Down for faster scrolling through long service lists
- Implement search/filter functionality for services
- Add jump-to-service shortcuts (first letter navigation)
-
-**Future Advanced Features:**
- Service dependency visualization
- Historical service status tracking
- Real-time log viewing integration
-
-## Core Architecture Principles - CRITICAL
+## Core Architecture Principles

 ### Individual Metrics Philosophy
-
-**NEW ARCHITECTURE**: Agent collects individual metrics, dashboard composes widgets from those metrics.
+- Agent collects individual metrics, dashboard composes widgets
+- Each metric collected, transmitted, and stored individually
+- Agent calculates status for each metric using thresholds
+- Dashboard aggregates individual metric statuses for widget status

 ### Maintenance Mode
-
-**Purpose:**
-
- Suppress email notifications during planned maintenance or backups
- Prevents false alerts when services are intentionally stopped
-
-**Implementation:**
-
 - Agent checks for `/tmp/cm-maintenance` file before sending notifications
 - File presence suppresses all email notifications while continuing monitoring
 - Dashboard continues to show real status, only notifications are blocked

-**Usage:**
-
+Usage:
 ```bash
 # Enable maintenance mode
 touch /tmp/cm-maintenance

-# Run maintenance tasks (backups, service restarts, etc.)
+# Run maintenance tasks
 systemctl stop service
 # ... maintenance work ...
 systemctl start service
@@ -182,61 +84,84 @@ systemctl start service
 rm /tmp/cm-maintenance
 ```

-**NixOS Integration:**
+## Development and Deployment Architecture

- Borgbackup script automatically creates/removes maintenance file
- Automatic cleanup via trap ensures maintenance mode doesn't stick
- All cinfiguration are shall be done from nixos config
+### Development Path
+- **Location:** `~/projects/cm-dashboard` 
+- **Purpose:** Development workflow only - for committing new code
+- **Access:** Only for developers to commit changes

-**ARCHITECTURE ENFORCEMENT**:
+### Deployment Path  
+- **Location:** `/var/lib/cm-dashboard/nixos-config`
+- **Purpose:** Production deployment only - agent clones/pulls from git
+- **Workflow:** git pull → `/var/lib/cm-dashboard/nixos-config` → nixos-rebuild

- **ZERO legacy code reuse** - Fresh implementation following ARCHITECT.md exactly
- **Individual metrics only** - NO grouped metric structures
- **Reference-only legacy** - Study old functionality, implement new architecture
- **Clean slate mindset** - Build as if legacy codebase never existed
+### Git Flow
+```
+Development: ~/projects/cm-dashboard → git commit → git push
+Deployment:  git pull → /var/lib/cm-dashboard/nixos-config → rebuild
+```

-**Implementation Rules**:
+## Automated Binary Release System

-1. **Individual Metrics**: Each metric is collected, transmitted, and stored individually
-2. **Agent Status Authority**: Agent calculates status for each metric using thresholds
-3. **Dashboard Composition**: Dashboard widgets subscribe to specific metrics by name
-4. **Status Aggregation**: Dashboard aggregates individual metric statuses for widget status
-   **Testing & Building**:
+CM Dashboard uses automated binary releases instead of source builds.

- **Workspace builds**: `cargo build --workspace` for all testing
- **Clean compilation**: Remove `target/` between architecture changes
- **ZMQ testing**: Test agent-dashboard communication independently
- **Widget testing**: Verify UI layout matches legacy appearance exactly
+### Creating New Releases
+```bash
+cd ~/projects/cm-dashboard
+git tag v0.1.X
+git push origin v0.1.X
+```

-**NEVER in New Implementation**:
+This automatically:
+- Builds static binaries with `RUSTFLAGS="-C target-feature=+crt-static"`
+- Creates GitHub-style release with tarball
+- Uploads binaries via Gitea API

- Copy/paste ANY code from legacy backup
- Calculate status in dashboard widgets
- Hardcode metric names in widgets (use const arrays)
+### NixOS Configuration Updates
+Edit `~/projects/nixosbox/hosts/services/cm-dashboard.nix`:

-# Important Communication Guidelines
+```nix
+version = "v0.1.X";
+src = pkgs.fetchurl {
+  url = "https://gitea.cmtec.se/cm/cm-dashboard/releases/download/${version}/cm-dashboard-linux-x86_64.tar.gz";
+  sha256 = "sha256-NEW_HASH_HERE";
+};
+```

-NEVER write that you have "successfully implemented" something or generate extensive summary text without first verifying with the user that the implementation is correct. This wastes tokens. Keep responses concise.
+### Get Release Hash
+```bash
+cd ~/projects/nixosbox
+nix-build --no-out-link -E 'with import <nixpkgs> {}; fetchurl {
+  url = "https://gitea.cmtec.se/cm/cm-dashboard/releases/download/v0.1.X/cm-dashboard-linux-x86_64.tar.gz";
+  sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
+}' 2>&1 | grep "got:"
+```

-NEVER implement code without first getting explicit user agreement on the approach. Always ask for confirmation before proceeding with implementation.
+### Building
+
+**Testing & Building:**
+- **Workspace builds**: `nix-shell -p openssl pkg-config --run "cargo build --workspace"`
+- **Clean compilation**: Remove `target/` between major changes
+
+## Important Communication Guidelines
+
+Keep responses concise and focused. Avoid extensive implementation summaries unless requested.

 ## Commit Message Guidelines

 **NEVER mention:**
-
 - Claude or any AI assistant names
 - Automation or AI-generated content
 - Any reference to automated code generation

 **ALWAYS:**
-
 - Focus purely on technical changes and their purpose
 - Use standard software development commit message format
 - Describe what was changed and why, not how it was created
 - Write from the perspective of a human developer

 **Examples:**
-
 - ❌ "Generated with Claude Code"
 - ❌ "AI-assisted implementation"
 - ❌ "Automated refactoring"
@@ -244,60 +169,22 @@ NEVER implement code without first getting explicit user agreement on the approa
 - ✅ "Restructure storage widget with improved layout"
 - ✅ "Update CPU thresholds to production values"

-## NixOS Configuration Updates
+## Implementation Rules

-When code changes are made to cm-dashboard, the NixOS configuration at `~/nixosbox` must be updated to deploy the changes.
+1. **Individual Metrics**: Each metric is collected, transmitted, and stored individually
+2. **Agent Status Authority**: Agent calculates status for each metric using thresholds
+3. **Dashboard Composition**: Dashboard widgets subscribe to specific metrics by name
+4. **Status Aggregation**: Dashboard aggregates individual metric statuses for widget status

-### Update Process
+**NEVER:**
+- Copy/paste ANY code from legacy implementations
+- Calculate status in dashboard widgets
+- Hardcode metric names in widgets (use const arrays)
+- Create files unless absolutely necessary for achieving goals
+- Create documentation files unless explicitly requested

-1. **Get Latest Commit Hash**
-
-   ```bash
-   git log -1 --format="%H"
-   ```
-
-2. **Update NixOS Configuration**
-   Edit `~/nixosbox/hosts/common/cm-dashboard.nix`:
-
-   ```nix
-   src = pkgs.fetchgit {
-     url = "https://gitea.cmtec.se/cm/cm-dashboard.git";
-     rev = "NEW_COMMIT_HASH_HERE";
-     sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="; # Placeholder
-   };
-   ```
-
-3. **Get Correct Source Hash**
-   Build with placeholder hash to get the actual hash:
-
-   ```bash
-   cd ~/nixosbox
-   nix-build --no-out-link -E 'with import <nixpkgs> {}; fetchgit {
-     url = "https://gitea.cmtec.se/cm/cm-dashboard.git";
-     rev = "NEW_COMMIT_HASH";
-     sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
-   }' 2>&1 | grep "got:"
-   ```
-
-   Example output:
-
-   ```
-   error: hash mismatch in fixed-output derivation '/nix/store/...':
-            specified: sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=
-               got:    sha256-x8crxNusOUYRrkP9mYEOG+Ga3JCPIdJLkEAc5P1ZxdQ=
-   ```
-
-4. **Update Configuration with Correct Hash**
-   Replace the placeholder with the hash from the error message (the "got:" line).
-
-5. **Commit NixOS Configuration**
-
-   ```bash
-   cd ~/nixosbox
-   git add hosts/common/cm-dashboard.nix
-   git commit -m "Update cm-dashboard to latest version (SHORT_HASH)"
-   git push
-   ```
-
-6. **Rebuild System**
-   The user handles the system rebuild step - this cannot be automated.
+**ALWAYS:**
+- Prefer editing existing files to creating new ones
+- Follow existing code conventions and patterns
+- Use existing libraries and utilities
+- Follow security best practices
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -270,7 +270,7 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"

 [[package]]
 name = "cm-dashboard"
-version = "0.1.0"
+version = "0.1.77"
 dependencies = [
 "anyhow",
 "chrono",
@@ -286,12 +286,13 @@ dependencies = [
 "toml",
 "tracing",
 "tracing-subscriber",
+ "wake-on-lan",
 "zmq",
 ]

 [[package]]
 name = "cm-dashboard-agent"
-version = "0.1.0"
+version = "0.1.77"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -314,7 +315,7 @@ dependencies = [

 [[package]]
 name = "cm-dashboard-shared"
-version = "0.1.0"
+version = "0.1.77"
 dependencies = [
 "chrono",
 "serde",
@@ -2064,6 +2065,12 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"

+[[package]]
+name = "wake-on-lan"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ccf60b60ad7e5b1b37372c5134cbcab4db0706c231d212e0c643a077462bc8f"
+
 [[package]]
 name = "walkdir"
 version = "2.5.0"
--- a/README.md
+++ b/README.md
@@ -1,88 +1,108 @@
 # CM Dashboard

-A real-time infrastructure monitoring system with intelligent status aggregation and email notifications, built with Rust and ZMQ.
+A high-performance Rust-based TUI dashboard for monitoring CMTEC infrastructure. Built with ZMQ-based metric collection and individual metrics architecture.

-## Current Implementation
+## Features

-This is a complete rewrite implementing an **individual metrics architecture** where:
+### Core Monitoring
+- **Real-time metrics**: CPU, RAM, Storage, and Service status
+- **Multi-host support**: Monitor multiple servers from single dashboard  
+- **Service management**: Start/stop services with intelligent status tracking
+- **NixOS integration**: System rebuild via SSH + tmux popup
+- **Backup monitoring**: Borgbackup status and scheduling
+- **Email notifications**: Intelligent batching prevents spam

- **Agent** collects individual metrics (e.g., `cpu_load_1min`, `memory_usage_percent`) and calculates status
- **Dashboard** subscribes to specific metrics and composes widgets
- **Status Aggregation** provides intelligent email notifications with batching
- **Persistent Cache** prevents false notifications on restart
+### User-Stopped Service Tracking
+Services stopped via the dashboard are intelligently tracked to prevent false alerts:

-## Dashboard Interface
+- **Smart status reporting**: User-stopped services show as Status::OK instead of Warning
+- **Persistent storage**: Tracking survives agent restarts via JSON storage
+- **Automatic management**: Flags cleared when services restarted via dashboard
+- **Maintenance friendly**: No false alerts during intentional service operations
+
+## Architecture
+
+### Individual Metrics Philosophy
+- **Agent**: Collects individual metrics, calculates status using thresholds
+- **Dashboard**: Subscribes to specific metrics, composes widgets from individual data
+- **ZMQ Communication**: Efficient real-time metric transmission
+- **Status Aggregation**: Host-level status calculated from all service metrics
+
+### Components
+
+```
+┌─────────────────┐    ZMQ     ┌─────────────────┐
+│                 │◄──────────►│                 │
+│   Agent         │  Metrics   │   Dashboard     │
+│   - Collectors  │            │   - TUI         │
+│   - Status      │            │   - Widgets     │
+│   - Tracking    │            │   - Commands    │
+│                 │            │                 │
+└─────────────────┘            └─────────────────┘
+         │                              │
+         ▼                              ▼
+┌─────────────────┐            ┌─────────────────┐
+│ JSON Storage    │            │ SSH + tmux      │
+│ - User-stopped  │            │ - Remote rebuild│
+│ - Cache         │            │ - Process       │
+│ - State         │            │   isolation     │
+└─────────────────┘            └─────────────────┘
+```
+
+### Service Control Flow
+
+1. **User Action**: Dashboard sends `UserStart`/`UserStop` commands
+2. **Agent Processing**: 
+   - Marks service as user-stopped (if stopping)
+   - Executes `systemctl start/stop service`
+   - Syncs state to global tracker
+3. **Status Calculation**: 
+   - Systemd collector checks user-stopped flag
+   - Reports Status::OK for user-stopped inactive services
+   - Normal Warning status for system failures
+
+## Interface

 ```
 cm-dashboard • ● cmbox ● srv01 ● srv02 ● steambox
 ┌system──────────────────────────────┐┌services─────────────────────────────────────────┐
-│CPU:                                ││Service:                  Status:  RAM:   Disk:  │
-│● Load: 0.10 0.52 0.88 • 400.0 MHz  ││● docker                  active   27M    496MB  │
-│RAM:                                ││● docker-registry         active   19M    496MB  │
-│● Used: 30% 2.3GB/7.6GB             ││● gitea                   active   579M   2.6GB  │
-│● tmp: 0.0% 0B/2.0GB                ││● gitea-runner-default    active   11M    2.6GB  │
-│Disk nvme0n1:                       ││● haasp-core              active   9M     1MB    │
-│● Health: PASSED                    ││● haasp-mqtt              active   3M     1MB    │
-│● Usage @root: 8.3% • 75.4/906.2 GB ││● haasp-webgrid           active   10M    1MB    │
-│● Usage @boot: 5.9% • 0.1/1.0 GB    ││● immich-server           active   240M   45.1GB │
-│                                    ││● mosquitto               active   1M     1MB    │
-│                                    ││● mysql                   active   38M    225MB  │
-│                                    ││● nginx                   active   28M    24MB   │
-│                                    ││  ├─ ● gitea.cmtec.se     51ms                   │
-│                                    ││  ├─ ● haasp.cmtec.se     43ms                   │
-│                                    ││  ├─ ● haasp.net          43ms                   │
-│                                    ││  ├─ ● pages.cmtec.se     45ms                   │
-└────────────────────────────────────┘│  ├─ ● photos.cmtec.se    41ms                   │
-┌backup──────────────────────────────┐│  ├─ ● unifi.cmtec.se     46ms                   │
-│Latest backup:                      ││  ├─ ● vault.cmtec.se     47ms                   │
-│● Status: OK                        ││  ├─ ● www.kryddorten.se  81ms                   │
-│Duration: 54s • Last: 4h ago        ││  ├─ ● www.mariehall2.se  86ms                   │
-│Disk usage: 48.2GB/915.8GB          ││● postgresql              active   112M   357MB  │
-│P/N: Samsung SSD 870 QVO 1TB        ││● redis-immich            active   8M     45.1GB │
-│S/N: S5RRNF0W800639Y                ││● sshd                    active   2M     0      │
-│● gitea 2 archives 2.7GB            ││● unifi                   active   594M   495MB  │
-│● immich 2 archives 45.0GB          ││● vaultwarden             active   12M    1MB    │
-│● kryddorten 2 archives 67.6MB      ││                                                 │
-│● mariehall2 2 archives 321.8MB     ││                                                 │
-│● nixosbox 2 archives 4.5MB         ││                                                 │
-│● unifi 2 archives 2.9MB            ││                                                 │
-│● vaultwarden 2 archives 305kB      ││                                                 │
+│NixOS:                              ││Service:                  Status:  RAM:   Disk:  │
+│Build: 25.05.20251004.3bcc93c       ││● docker                  active   27M    496MB  │
+│Agent: v0.1.43                      ││● gitea                   active   579M   2.6GB  │
+│Active users: cm, simon             ││● nginx                   active   28M    24MB   │
+│CPU:                                ││  ├─ ● gitea.cmtec.se     51ms                   │
+│● Load: 0.10 0.52 0.88 • 3000MHz    ││  ├─ ● photos.cmtec.se    41ms                   │
+│RAM:                                ││● postgresql              active   112M   357MB  │
+│● Usage: 33% 2.6GB/7.6GB            ││● redis-immich            user-stopped           │
+│● /tmp: 0% 0B/2.0GB                 ││● sshd                    active   2M     0      │
+│Storage:                            ││● unifi                   active   594M   495MB  │
+│● root (Single):                    ││                                                 │
+│ ├─ ● nvme0n1 W: 1%                 ││                                                 │
+│ └─ ● 18% 167.4GB/928.2GB           ││                                                 │
 └────────────────────────────────────┘└─────────────────────────────────────────────────┘
 ```

-**Navigation**: `←→` switch hosts, `r` refresh, `q` quit
+### Navigation
+- **Tab**: Switch between hosts
+- **↑↓ or j/k**: Navigate services
+- **s**: Start selected service (UserStart)  
+- **S**: Stop selected service (UserStop)
+- **J**: Show service logs (journalctl in tmux popup)
+- **L**: Show custom log files (tail -f custom paths in tmux popup)
+- **R**: Rebuild current host
+- **B**: Run backup on current host
+- **q**: Quit

-## Features
-
- **Real-time monitoring** - Dashboard updates every 1-2 seconds
- **Individual metric collection** - Granular data for flexible dashboard composition
- **Intelligent status aggregation** - Host-level status calculated from all services
- **Smart email notifications** - Batched, detailed alerts with service groupings
- **Persistent state** - Prevents false notifications on restarts
- **ZMQ communication** - Efficient agent-to-dashboard messaging
- **Clean TUI** - Terminal-based dashboard with color-coded status indicators
-
-## Architecture
-
-### Core Components
-
- **Agent** (`cm-dashboard-agent`) - Collects metrics and sends via ZMQ
- **Dashboard** (`cm-dashboard`) - Real-time TUI display consuming metrics
- **Shared** (`cm-dashboard-shared`) - Common types and protocol
- **Status Aggregation** - Intelligent batching and notification management
- **Persistent Cache** - Maintains state across restarts
-
-### Status Levels
-
- **🟢 Ok** - Service running normally
- **🔵 Pending** - Service starting/stopping/reloading
- **🟡 Warning** - Service issues (high load, memory, disk usage)
- **🔴 Critical** - Service failed or critical thresholds exceeded
- **❓ Unknown** - Service state cannot be determined
+### Status Indicators
+- **Green ●**: Active service
+- **Yellow ◐**: Inactive service (system issue)
+- **Red ◯**: Failed service
+- **Blue arrows**: Service transitioning (↑ starting, ↓ stopping, ↻ restarting)
+- **"user-stopped"**: Service stopped via dashboard (Status::OK)

 ## Quick Start

-### Build
+### Building

 ```bash
 # With Nix (recommended)
@@ -93,21 +113,20 @@ sudo apt install libssl-dev pkg-config  # Ubuntu/Debian
 cargo build --workspace
 ```

-### Run
+### Running

 ```bash
-# Start agent (requires configuration file)
+# Start agent (requires configuration)
 ./target/debug/cm-dashboard-agent --config /etc/cm-dashboard/agent.toml

-# Start dashboard
-./target/debug/cm-dashboard --config /path/to/dashboard.toml
+# Start dashboard (inside tmux session)
+tmux
+./target/debug/cm-dashboard --config /etc/cm-dashboard/dashboard.toml
 ```

 ## Configuration

-### Agent Configuration (`agent.toml`)
-
-The agent requires a comprehensive TOML configuration file:
+### Agent Configuration

 ```toml
 collection_interval_seconds = 2
@@ -116,47 +135,27 @@ collection_interval_seconds = 2
 publisher_port = 6130
 command_port = 6131
 bind_address = "0.0.0.0"
-timeout_ms = 5000
-heartbeat_interval_ms = 30000
+transmission_interval_seconds = 2

 [collectors.cpu]
 enabled = true
 interval_seconds = 2
-load_warning_threshold = 9.0
+load_warning_threshold = 5.0
 load_critical_threshold = 10.0
-temperature_warning_threshold = 100.0
-temperature_critical_threshold = 110.0

 [collectors.memory]
 enabled = true
 interval_seconds = 2
 usage_warning_percent = 80.0
-usage_critical_percent = 95.0
-
-[collectors.disk]
-enabled = true
-interval_seconds = 300
-usage_warning_percent = 80.0
 usage_critical_percent = 90.0

-[[collectors.disk.filesystems]]
-name = "root"
-uuid = "4cade5ce-85a5-4a03-83c8-dfd1d3888d79"
-mount_point = "/"
-fs_type = "ext4"
-monitor = true
-
 [collectors.systemd]
 enabled = true
 interval_seconds = 10
-memory_warning_mb = 1000.0
-memory_critical_mb = 2000.0
-service_name_filters = [
-  "nginx", "postgresql", "redis", "docker", "sshd"
-]
-excluded_services = [
-  "nginx-config-reload", "sshd-keygen"
-]
+service_name_filters = ["nginx*", "postgresql*", "docker*", "sshd*"]
+excluded_services = ["nginx-config-reload", "systemd-", "getty@"]
+nginx_latency_critical_ms = 1000.0
+http_timeout_seconds = 10

 [notifications]
 enabled = true
@@ -164,251 +163,203 @@ smtp_host = "localhost"
 smtp_port = 25
 from_email = "{hostname}@example.com"
 to_email = "admin@example.com"
-rate_limit_minutes = 0
-trigger_on_warnings = true
-trigger_on_failures = true
-recovery_requires_all_ok = true
-suppress_individual_recoveries = true
-
-[status_aggregation]
-enabled = true
-aggregation_method = "worst_case"
-notification_interval_seconds = 30
-
-[cache]
-persist_path = "/var/lib/cm-dashboard/cache.json"
+aggregation_interval_seconds = 30
 ```

-### Dashboard Configuration (`dashboard.toml`)
+### Dashboard Configuration

 ```toml
 [zmq]
-hosts = [
-  { name = "server1", address = "192.168.1.100", port = 6130 },
-  { name = "server2", address = "192.168.1.101", port = 6130 }
-]
-connection_timeout_ms = 5000
-reconnect_interval_ms = 10000
+subscriber_ports = [6130]

-[ui]
-refresh_interval_ms = 1000
-theme = "dark"
+[hosts]
+predefined_hosts = ["cmbox", "srv01", "srv02"]
+
+[ssh]
+rebuild_user = "cm"
+rebuild_alias = "nixos-rebuild-cmtec"
+backup_alias = "cm-backup-run"
 ```

-## Collectors
+## Technical Implementation

-The agent implements several specialized collectors:
+### Collectors

-### CPU Collector (`cpu.rs`)
+#### Systemd Collector
+- **Service Discovery**: Uses `systemctl list-unit-files` + `list-units --all`
+- **Status Calculation**: Checks user-stopped flag before assigning Warning status
+- **Memory Tracking**: Per-service memory usage via `systemctl show`
+- **Sub-services**: Nginx site latency, Docker containers
+- **User-stopped Integration**: `UserStoppedServiceTracker::is_service_user_stopped()`

- Load average (1, 5, 15 minute)
- CPU temperature monitoring
- Real-time process monitoring (top CPU consumers)
- Status calculation with configurable thresholds
+#### User-Stopped Service Tracker
+- **Storage**: `/var/lib/cm-dashboard/user-stopped-services.json`
+- **Thread Safety**: Global singleton with `Arc<Mutex<>>`
+- **Persistence**: Automatic save on state changes
+- **Global Access**: Static methods for collector integration

-### Memory Collector (`memory.rs`)
+#### Other Collectors
+- **CPU**: Load average, temperature, frequency monitoring
+- **Memory**: RAM/swap usage, tmpfs monitoring  
+- **Disk**: Filesystem usage, SMART health data
+- **NixOS**: Build version, active users, agent version
+- **Backup**: Borgbackup repository status and metrics

- RAM usage (total, used, available)
- Swap monitoring
- Real-time process monitoring (top RAM consumers)
- Memory pressure detection
+### ZMQ Protocol

-### Disk Collector (`disk.rs`)
+```rust
+// Metric Message
+#[derive(Serialize, Deserialize)]
+pub struct MetricMessage {
+    pub hostname: String,
+    pub timestamp: u64,
+    pub metrics: Vec<Metric>,
+}

- Filesystem usage per mount point
- SMART health monitoring
- Temperature and wear tracking
- Configurable filesystem monitoring
+// Service Commands
+pub enum AgentCommand {
+    ServiceControl {
+        service_name: String,
+        action: ServiceAction,
+    },
+    SystemRebuild { /* SSH config */ },
+    CollectNow,
+}

-### Systemd Collector (`systemd.rs`)
+pub enum ServiceAction {
+    Start,           // System-initiated
+    Stop,            // System-initiated  
+    UserStart,       // User via dashboard (clears user-stopped)
+    UserStop,        // User via dashboard (marks user-stopped)
+    Status,
+}
+```

- Service status monitoring (`active`, `inactive`, `failed`)
- Memory usage per service
- Service filtering and exclusions
- Handles transitional states (`Status::Pending`)
+### Maintenance Mode

-### Backup Collector (`backup.rs`)
+Suppress notifications during planned maintenance:

- Reads TOML status files from backup systems
- Archive age verification
- Disk usage tracking
- Repository health monitoring
+```bash
+# Enable maintenance mode
+touch /tmp/cm-maintenance
+
+# Perform maintenance
+systemctl stop service
+# ... work ...
+systemctl start service  
+
+# Disable maintenance mode
+rm /tmp/cm-maintenance
+```

 ## Email Notifications

 ### Intelligent Batching
+- **Real-time dashboard**: Immediate status updates
+- **Batched emails**: Aggregated every 30 seconds
+- **Smart grouping**: Services organized by severity
+- **Recovery suppression**: Reduces notification spam

-The system implements smart notification batching to prevent email spam:
-
- **Real-time dashboard updates** - Status changes appear immediately
- **Batched email notifications** - Aggregated every 30 seconds
- **Detailed groupings** - Services organized by severity
-
-### Example Alert Email
-
+### Example Alert
 ```
-Subject: Status Alert: 2 critical, 1 warning, 15 started
+Subject: Status Alert: 1 critical, 2 warnings, 0 recoveries

 Status Summary (30s duration)
 Host Status: Ok → Warning

-🔴 CRITICAL ISSUES (2):
-  postgresql: Ok → Critical
-  nginx: Warning → Critical
+🔴 CRITICAL ISSUES (1):
+  postgresql: Ok → Critical (memory usage 95%)

-🟡 WARNINGS (1):
-  redis: Ok → Warning (memory usage 85%)
+🟡 WARNINGS (2):
+  nginx: Ok → Warning (high load 8.5)
+  redis: user-stopped → Warning (restarted by system)

 ✅ RECOVERIES (0):

-🟢 SERVICE STARTUPS (15):
-  docker: Unknown → Ok
-  sshd: Unknown → Ok
-  ...
-
 --
-CM Dashboard Agent
-Generated at 2025-10-21 19:42:42 CET
+CM Dashboard Agent v0.1.43
 ```

-## Individual Metrics Architecture
-
-The system follows a **metrics-first architecture**:
-
-### Agent Side
-
-```rust
-// Agent collects individual metrics
-vec![
-    Metric::new("cpu_load_1min".to_string(), MetricValue::Float(2.5), Status::Ok),
-    Metric::new("memory_usage_percent".to_string(), MetricValue::Float(78.5), Status::Warning),
-    Metric::new("service_nginx_status".to_string(), MetricValue::String("active".to_string()), Status::Ok),
-]
-```
-
-### Dashboard Side
-
-```rust
-// Widgets subscribe to specific metrics
-impl Widget for CpuWidget {
-    fn update_from_metrics(&mut self, metrics: &[&Metric]) {
-        for metric in metrics {
-            match metric.name.as_str() {
-                "cpu_load_1min" => self.load_1min = metric.value.as_f32(),
-                "cpu_load_5min" => self.load_5min = metric.value.as_f32(),
-                "cpu_temperature_celsius" => self.temperature = metric.value.as_f32(),
-                _ => {}
-            }
-        }
-    }
-}
-```
-
-## Persistent Cache
-
-The cache system prevents false notifications:
-
- **Automatic saving** - Saves when service status changes
- **Persistent storage** - Maintains state across agent restarts
- **Simple design** - No complex TTL or cleanup logic
- **Status preservation** - Prevents duplicate notifications
-
 ## Development

 ### Project Structure
-
 ```
 cm-dashboard/
-├── agent/                  # Metrics collection agent
+├── agent/                     # Metrics collection agent
 │   ├── src/
-│   │   ├── collectors/     # CPU, memory, disk, systemd, backup
-│   │   ├── status/         # Status aggregation and notifications
-│   │   ├── cache/          # Persistent metric caching
-│   │   ├── config/         # TOML configuration loading
-│   │   └── notifications/  # Email notification system
-├── dashboard/              # TUI dashboard application
+│   │   ├── collectors/        # CPU, memory, disk, systemd, backup, nixos
+│   │   ├── service_tracker.rs # User-stopped service tracking
+│   │   ├── status/            # Status aggregation and notifications
+│   │   ├── config/            # TOML configuration loading
+│   │   └── communication/     # ZMQ message handling
+├── dashboard/                 # TUI dashboard application  
 │   ├── src/
-│   │   ├── ui/widgets/     # CPU, memory, services, backup widgets
-│   │   ├── metrics/        # Metric storage and filtering
-│   │   └── communication/  # ZMQ metric consumption
-├── shared/                 # Shared types and utilities
+│   │   ├── ui/widgets/        # CPU, memory, services, backup, system
+│   │   ├── communication/     # ZMQ consumption and commands
+│   │   └── app.rs            # Main application loop
+├── shared/                    # Shared types and utilities
 │   └── src/
-│       ├── metrics.rs      # Metric, Status, and Value types
-│       ├── protocol.rs     # ZMQ message format
-│       └── cache.rs        # Cache configuration
-└── README.md              # This file
+│       ├── metrics.rs         # Metric, Status, StatusTracker types
+│       ├── protocol.rs        # ZMQ message format
+│       └── cache.rs           # Cache configuration
+└── CLAUDE.md                  # Development guidelines and rules
 ```

-### Building
-
+### Testing
 ```bash
-# Debug build
-cargo build --workspace
+# Build and test
+nix-shell -p openssl pkg-config --run "cargo build --workspace"
+nix-shell -p openssl pkg-config --run "cargo test --workspace"

-# Release build
-cargo build --workspace --release
-
-# Run tests
-cargo test --workspace
-
-# Check code formatting
-cargo fmt --all -- --check
-
-# Run clippy linter
+# Code quality
+cargo fmt --all
 cargo clippy --workspace -- -D warnings
 ```

-### Dependencies
+## Deployment

- **tokio** - Async runtime
- **zmq** - Message passing between agent and dashboard
- **ratatui** - Terminal user interface
- **serde** - Serialization for metrics and config
- **anyhow/thiserror** - Error handling
- **tracing** - Structured logging
- **lettre** - SMTP email notifications
- **clap** - Command-line argument parsing
- **toml** - Configuration file parsing
+### Automated Binary Releases
+```bash
+# Create new release
+cd ~/projects/cm-dashboard
+git tag v0.1.X
+git push origin v0.1.X
+```

-## NixOS Integration
+This triggers automated:
+- Static binary compilation with `RUSTFLAGS="-C target-feature=+crt-static"`
+- GitHub-style release creation
+- Tarball upload to Gitea

-This project is designed for declarative deployment via NixOS:
-
-### Configuration Generation
-
-The NixOS module automatically generates the agent configuration:
+### NixOS Integration
+Update `~/projects/nixosbox/hosts/services/cm-dashboard.nix`:

 ```nix
-# hosts/common/cm-dashboard.nix
-services.cm-dashboard-agent = {
-  enable = true;
-  port = 6130;
+version = "v0.1.43";
+src = pkgs.fetchurl {
+  url = "https://gitea.cmtec.se/cm/cm-dashboard/releases/download/${version}/cm-dashboard-linux-x86_64.tar.gz";
+  sha256 = "sha256-HASH";
 };
 ```

-### Deployment
-
+Get hash via:
 ```bash
-# Update NixOS configuration
-git add hosts/common/cm-dashboard.nix
-git commit -m "Update cm-dashboard configuration"
-git push
-
-# Rebuild system (user-performed)
-sudo nixos-rebuild switch --flake .
+cd ~/projects/nixosbox
+nix-build --no-out-link -E 'with import <nixpkgs> {}; fetchurl {
+  url = "URL_HERE";
+  sha256 = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
+}' 2>&1 | grep "got:"
 ```

 ## Monitoring Intervals

- **CPU/Memory**: 2 seconds (real-time monitoring)
- **Disk usage**: 300 seconds (5 minutes)
- **Systemd services**: 10 seconds
- **SMART health**: 600 seconds (10 minutes)
- **Backup status**: 60 seconds (1 minute)
- **Email notifications**: 30 seconds (batched)
- **Dashboard updates**: 1 second (real-time display)
+- **Metrics Collection**: 2 seconds (CPU, memory, services)
+- **Metric Transmission**: 2 seconds (ZMQ publish)
+- **Dashboard Updates**: 1 second (UI refresh)
+- **Email Notifications**: 30 seconds (batched)
+- **Disk Monitoring**: 300 seconds (5 minutes)
+- **Service Discovery**: 300 seconds (5 minutes cache)

 ## License

-MIT License - see LICENSE file for details
-
+MIT License - see LICENSE file for details.
--- a/TODO.md
+++ b/TODO.md
@@ -1,63 +0,0 @@
-# TODO
-
-## Systemd filtering (agent)
-
- remove user systemd collection
- reduce number of systemctl call
- Cahnge so only services in include list are detected
- Filter on exact name
- Add support for "\*" in filtering
-
-## System panel (agent/dashboard)
-
-use following layout:
-'''
-NixOS:
-Build: xxxxxx
-Agen: xxxxxx
-CPU:
-● Load: 0.02 0.31 0.86
-└─ Freq: 3000MHz
-RAM:
-● Usage: 33% 2.6GB/7.6GB  
- └─ ● /tmp: 0% 0B/2.0GB
-Storage:
-● /:  
- ├─ ● nvme0n1 T: 40C • W: 4%  
- └─ ● 8% 75.0GB/906.2GB
-'''
-
- Add support to show login/active users
- Add support to show timestamp/version for latest nixos rebuild
-
-## Backup panel (dashboard)
-
-use following layout:
-'''
-Latest backup:  
-● <timestamp>
-└─ Duration: 1.3m
-Disk:
-● Samsung SSD 870 QVO 1TB  
- ├─ S/N: S5RRNF0W800639Y
-└─ Usage: 50.5GB/915.8GB
-Repos:
-● gitea (4) 5.1GB  
-● immich (4) 45.0GB  
-● kryddorten (4) 67.8MB  
-● mariehall2 (4) 322.7MB
-● nixosbox (4) 5.5MB  
-● unifi (4) 5.7MB  
-● vaultwarden (4) 508kB
-'''
-
-## Keyboard navigation and scrolling (dashboard)
-
- Add keyboard navigation between panels "Shift-Tab"
- Add lower statusbar with dynamic updated shortcuts when switchng between panels
-
-## Remote execution (agent/dashboard)
-
- Add support for send command via dashboard to agent to do nixos rebuid
- Add support for navigating services in dashboard and trigger start/stop/restart
- Add support for trigger backup
--- a/agent/Cargo.toml
+++ b/agent/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cm-dashboard-agent"
-version = "0.1.0"
+version = "0.1.78"
 edition = "2021"

 [dependencies]
--- a/agent/src/agent.rs
+++ b/agent/src/agent.rs
@@ -4,12 +4,13 @@ use std::time::Duration;
 use tokio::time::interval;
 use tracing::{debug, error, info};

-use crate::communication::{AgentCommand, ServiceAction, ZmqHandler};
+use crate::communication::{AgentCommand, ZmqHandler};
 use crate::config::AgentConfig;
 use crate::metrics::MetricCollectionManager;
 use crate::notifications::NotificationManager;
+use crate::service_tracker::UserStoppedServiceTracker;
 use crate::status::HostStatusManager;
-use cm_dashboard_shared::{Metric, MetricMessage};
+use cm_dashboard_shared::{Metric, MetricMessage, MetricValue, Status};

 pub struct Agent {
    hostname: String,
@@ -18,6 +19,7 @@ pub struct Agent {
    metric_manager: MetricCollectionManager,
    notification_manager: NotificationManager,
    host_status_manager: HostStatusManager,
+    service_tracker: UserStoppedServiceTracker,
 }

 impl Agent {
@@ -50,6 +52,10 @@ impl Agent {
        let host_status_manager = HostStatusManager::new(config.status_aggregation.clone());
        info!("Host status manager initialized");

+        // Initialize user-stopped service tracker
+        let service_tracker = UserStoppedServiceTracker::init_global()?;
+        info!("User-stopped service tracker initialized");
+
        Ok(Self {
            hostname,
            config,
@@ -57,6 +63,7 @@ impl Agent {
            metric_manager,
            notification_manager,
            host_status_manager,
+            service_tracker,
        })
    }

@@ -71,11 +78,12 @@ impl Agent {
            info!("Initial metric collection completed - all data cached and ready");
        }

-        // Separate intervals for collection and transmission
+        // Separate intervals for collection, transmission, heartbeat, and email notifications
        let mut collection_interval =
            interval(Duration::from_secs(self.config.collection_interval_seconds));
-        let mut transmission_interval = interval(Duration::from_secs(1)); // ZMQ broadcast every 1 second
-        let mut notification_interval = interval(Duration::from_secs(self.config.status_aggregation.notification_interval_seconds));
+        let mut transmission_interval = interval(Duration::from_secs(self.config.zmq.transmission_interval_seconds));
+        let mut heartbeat_interval = interval(Duration::from_secs(self.config.zmq.heartbeat_interval_seconds));
+        let mut notification_interval = interval(Duration::from_secs(self.config.notifications.aggregation_interval_seconds));

        loop {
            tokio::select! {
@@ -86,13 +94,19 @@ impl Agent {
                    }
                }
                _ = transmission_interval.tick() => {
-                    // Send all cached metrics via ZMQ every 1 second
-                    if let Err(e) = self.broadcast_all_cached_metrics().await {
-                        error!("Failed to broadcast cached metrics: {}", e);
+                    // Send all metrics via ZMQ (dashboard updates only)
+                    if let Err(e) = self.broadcast_all_metrics().await {
+                        error!("Failed to broadcast metrics: {}", e);
+                    }
+                }
+                _ = heartbeat_interval.tick() => {
+                    // Send standalone heartbeat for host connectivity detection
+                    if let Err(e) = self.send_heartbeat().await {
+                        error!("Failed to send heartbeat: {}", e);
                    }
                }
                _ = notification_interval.tick() => {
-                    // Process batched notifications
+                    // Process batched email notifications (separate from dashboard updates)
                    if let Err(e) = self.host_status_manager.process_pending_notifications(&mut self.notification_manager).await {
                        error!("Failed to process pending notifications: {}", e);
                    }
@@ -127,8 +141,8 @@ impl Agent {

        info!("Force collected and cached {} metrics", metrics.len());

-        // Process metrics through status manager
-        self.process_metrics(&metrics).await;
+        // Process metrics through status manager (collect status data at startup)
+        let _status_changed = self.process_metrics(&metrics).await;

        Ok(())
    }
@@ -146,41 +160,116 @@ impl Agent {

        debug!("Collected and cached {} metrics", metrics.len());

-        // Process metrics through status manager
-        self.process_metrics(&metrics).await;
+        // Process metrics through status manager and trigger immediate transmission if status changed
+        let status_changed = self.process_metrics(&metrics).await;
+        
+        if status_changed {
+            info!("Status change detected - triggering immediate metric transmission");
+            if let Err(e) = self.broadcast_all_metrics().await {
+                error!("Failed to broadcast metrics after status change: {}", e);
+            }
+        }

        Ok(())
    }

-    async fn broadcast_all_cached_metrics(&mut self) -> Result<()> {
-        debug!("Broadcasting all cached metrics via ZMQ");
+    async fn broadcast_all_metrics(&mut self) -> Result<()> {
+        debug!("Broadcasting cached metrics via ZMQ");

-        // Get all cached metrics from the metric manager
-        let mut cached_metrics = self.metric_manager.get_all_cached_metrics().await?;
+        // Get cached metrics (no fresh collection)
+        let mut metrics = self.metric_manager.get_cached_metrics();

        // Add the host status summary metric from status manager
        let host_status_metric = self.host_status_manager.get_host_status_metric();
-        cached_metrics.push(host_status_metric);
+        metrics.push(host_status_metric);

-        if cached_metrics.is_empty() {
-            debug!("No cached metrics to broadcast");
+        // Add agent version metric for cross-host version comparison
+        let version_metric = self.get_agent_version_metric();
+        metrics.push(version_metric);
+
+        // Add heartbeat metric for host connectivity detection
+        let heartbeat_metric = self.get_heartbeat_metric();
+        metrics.push(heartbeat_metric);
+
+        // Check for user-stopped services that are now active and clear their flags
+        self.clear_user_stopped_flags_for_active_services(&metrics);
+
+        if metrics.is_empty() {
+            debug!("No metrics to broadcast");
            return Ok(());
        }

-        debug!("Broadcasting {} cached metrics (including host status summary)", cached_metrics.len());
+        debug!("Broadcasting {} cached metrics (including host status summary)", metrics.len());

-        // Create and send message with all cached data
-        let message = MetricMessage::new(self.hostname.clone(), cached_metrics);
+        // Create and send message with all current data
+        let message = MetricMessage::new(self.hostname.clone(), metrics);
        self.zmq_handler.publish_metrics(&message).await?;

-        debug!("Cached metrics broadcasted successfully");
+        debug!("Metrics broadcasted successfully");
        Ok(())
    }

-    async fn process_metrics(&mut self, metrics: &[Metric]) {
+    async fn process_metrics(&mut self, metrics: &[Metric]) -> bool {
+        let mut status_changed = false;
        for metric in metrics {
-            self.host_status_manager.process_metric(metric, &mut self.notification_manager, self.metric_manager.get_cache_manager()).await;
+            // Filter excluded metrics from email notification processing only
+            if self.config.notifications.exclude_email_metrics.contains(&metric.name) {
+                debug!("Excluding metric '{}' from email notification processing", metric.name);
+                continue;
+            }
+            
+            if self.host_status_manager.process_metric(metric, &mut self.notification_manager).await {
+                status_changed = true;
+            }
        }
+        status_changed
+    }
+
+    /// Create agent version metric for cross-host version comparison
+    fn get_agent_version_metric(&self) -> Metric {
+        // Get version from executable path (same logic as main.rs get_version)
+        let version = self.get_agent_version();
+        
+        Metric::new(
+            "agent_version".to_string(),
+            MetricValue::String(version),
+            Status::Ok,
+        )
+    }
+
+    /// Get agent version from Cargo package version
+    fn get_agent_version(&self) -> String {
+        // Use the version from Cargo.toml (e.g., "0.1.11")
+        format!("v{}", env!("CARGO_PKG_VERSION"))
+    }
+
+    /// Create heartbeat metric for host connectivity detection
+    fn get_heartbeat_metric(&self) -> Metric {
+        use std::time::{SystemTime, UNIX_EPOCH};
+        
+        let timestamp = SystemTime::now()
+            .duration_since(UNIX_EPOCH)
+            .unwrap()
+            .as_secs();
+        
+        Metric::new(
+            "agent_heartbeat".to_string(),
+            MetricValue::Integer(timestamp as i64),
+            Status::Ok,
+        )
+    }
+
+    /// Send standalone heartbeat for connectivity detection
+    async fn send_heartbeat(&mut self) -> Result<()> {
+        let heartbeat_metric = self.get_heartbeat_metric();
+        let message = MetricMessage::new(
+            self.hostname.clone(),
+            vec![heartbeat_metric],
+        );
+
+        self.zmq_handler.publish_metrics(&message).await?;
+        debug!("Sent standalone heartbeat for connectivity detection");
+        Ok(())
    }

    async fn handle_commands(&mut self) -> Result<()> {
@@ -226,202 +315,38 @@ impl Agent {
                info!("Processing Ping command - agent is alive");
                // Could send a response back via ZMQ if needed
            }
-            AgentCommand::ServiceControl { service_name, action } => {
-                info!("Processing ServiceControl command: {} {:?}", service_name, action);
-                if let Err(e) = self.handle_service_control(&service_name, &action).await {
-                    error!("Failed to execute service control: {}", e);
-                }
-            }
-            AgentCommand::SystemRebuild { git_url, git_branch, working_dir, api_key_file } => {
-                info!("Processing SystemRebuild command: {} @ {} -> {}", git_url, git_branch, working_dir);
-                if let Err(e) = self.handle_system_rebuild(&git_url, &git_branch, &working_dir, api_key_file.as_deref()).await {
-                    error!("Failed to execute system rebuild: {}", e);
-                }
-            }
        }
        Ok(())
    }

-    /// Handle systemd service control commands
-    async fn handle_service_control(&self, service_name: &str, action: &ServiceAction) -> Result<()> {
-        let action_str = match action {
-            ServiceAction::Start => "start",
-            ServiceAction::Stop => "stop", 
-            ServiceAction::Restart => "restart",
-            ServiceAction::Status => "status",
-        };

-        info!("Executing systemctl {} {}", action_str, service_name);
-
-        let output = tokio::process::Command::new("sudo")
-            .arg("systemctl")
-            .arg(action_str)
-            .arg(service_name)
-            .output()
-            .await?;
-
-        if output.status.success() {
-            info!("Service {} {} completed successfully", service_name, action_str);
-            if !output.stdout.is_empty() {
-                debug!("stdout: {}", String::from_utf8_lossy(&output.stdout));
-            }
-        } else {
-            let stderr = String::from_utf8_lossy(&output.stderr);
-            error!("Service {} {} failed: {}", service_name, action_str, stderr);
-            return Err(anyhow::anyhow!("systemctl {} {} failed: {}", action_str, service_name, stderr));
-        }
-
-        // Force refresh metrics after service control to update service status
-        if matches!(action, ServiceAction::Start | ServiceAction::Stop | ServiceAction::Restart) {
-            info!("Triggering metric refresh after service control");
-            // Note: We can't call self.collect_metrics_only() here due to borrowing issues
-            // The next metric collection cycle will pick up the changes
-        }
-
-        Ok(())
-    }
-
-    /// Handle NixOS system rebuild commands with git clone approach
-    async fn handle_system_rebuild(&self, git_url: &str, git_branch: &str, working_dir: &str, api_key_file: Option<&str>) -> Result<()> {
-        info!("Starting NixOS system rebuild: {} @ {} -> {}", git_url, git_branch, working_dir);
-
-        // Enable maintenance mode before rebuild
-        let maintenance_file = "/tmp/cm-maintenance";
-        if let Err(e) = tokio::fs::File::create(maintenance_file).await {
-            error!("Failed to create maintenance mode file: {}", e);
-        } else {
-            info!("Maintenance mode enabled");
-        }
-
-        // Clone or update repository
-        let git_result = self.ensure_git_repository(git_url, git_branch, working_dir, api_key_file).await;
-        
-        // Execute nixos-rebuild if git operation succeeded
-        let rebuild_result = if git_result.is_ok() {
-            info!("Git repository ready, executing nixos-rebuild");
-            tokio::process::Command::new("sudo")
-                .arg("/run/current-system/sw/bin/nixos-rebuild")
-                .arg("switch")
-                .arg("--option")
-                .arg("sandbox")
-                .arg("false")
-                .arg("--flake")
-                .arg(".")
-                .current_dir(working_dir)
-                .output()
-                .await
-        } else {
-            return git_result.and_then(|_| unreachable!());
-        };
-
-        // Always try to remove maintenance mode file
-        if let Err(e) = tokio::fs::remove_file(maintenance_file).await {
-            if e.kind() != std::io::ErrorKind::NotFound {
-                error!("Failed to remove maintenance mode file: {}", e);
-            }
-        } else {
-            info!("Maintenance mode disabled");
-        }
-
-        // Check rebuild result
-        match rebuild_result {
-            Ok(output) => {
-                if output.status.success() {
-                    info!("NixOS rebuild completed successfully");
-                    if !output.stdout.is_empty() {
-                        debug!("rebuild stdout: {}", String::from_utf8_lossy(&output.stdout));
-                    }
-                } else {
-                    let stderr = String::from_utf8_lossy(&output.stderr);
-                    error!("NixOS rebuild failed: {}", stderr);
-                    return Err(anyhow::anyhow!("nixos-rebuild failed: {}", stderr));
-                }
-            }
-            Err(e) => {
-                error!("Failed to execute nixos-rebuild: {}", e);
-                return Err(anyhow::anyhow!("Failed to execute nixos-rebuild: {}", e));
-            }
-        }
-
-        info!("System rebuild completed, triggering metric refresh");
-        Ok(())
-    }
-
-    /// Ensure git repository is cloned and up to date
-    async fn ensure_git_repository(&self, git_url: &str, git_branch: &str, working_dir: &str, api_key_file: Option<&str>) -> Result<()> {
-        use std::path::Path;
-        
-        // Read API key if provided
-        let auth_url = if let Some(key_file) = api_key_file {
-            match tokio::fs::read_to_string(key_file).await {
-                Ok(api_key) => {
-                    let api_key = api_key.trim();
-                    if !api_key.is_empty() {
-                        // Convert https://gitea.cmtec.se/cm/nixosbox.git to https://token@gitea.cmtec.se/cm/nixosbox.git
-                        if git_url.starts_with("https://") {
-                            let url_without_protocol = &git_url[8..]; // Remove "https://"
-                            format!("https://{}@{}", api_key, url_without_protocol)
-                        } else {
-                            info!("API key provided but URL is not HTTPS, using original URL");
-                            git_url.to_string()
+    /// Check metrics for user-stopped services that are now active and clear their flags
+    fn clear_user_stopped_flags_for_active_services(&mut self, metrics: &[Metric]) {
+        for metric in metrics {
+            // Look for service status metrics that are active
+            if metric.name.starts_with("service_") && metric.name.ends_with("_status") {
+                if let MetricValue::String(status) = &metric.value {
+                    if status == "active" {
+                        // Extract service name from metric name (service_nginx_status -> nginx)
+                        let service_name = metric.name
+                            .strip_prefix("service_")
+                            .and_then(|s| s.strip_suffix("_status"))
+                            .unwrap_or("");
+                        
+                        if !service_name.is_empty() && UserStoppedServiceTracker::is_service_user_stopped(service_name) {
+                            info!("Service '{}' is now active - clearing user-stopped flag", service_name);
+                            if let Err(e) = self.service_tracker.clear_user_stopped(service_name) {
+                                error!("Failed to clear user-stopped flag for '{}': {}", service_name, e);
+                            } else {
+                                // Sync to global tracker
+                                UserStoppedServiceTracker::update_global(&self.service_tracker);
+                                debug!("Cleared user-stopped flag for service '{}'", service_name);
+                            }
                        }
-                    } else {
-                        info!("API key file is empty, using original URL");
-                        git_url.to_string()
                    }
                }
-                Err(e) => {
-                    info!("Could not read API key file {}: {}, using original URL", key_file, e);
-                    git_url.to_string()
-                }
            }
-        } else {
-            git_url.to_string()
-        };
-        
-        let git_dir = Path::new(working_dir).join(".git");
-        
-        if git_dir.exists() {
-            info!("Git repository exists, updating to latest {}", git_branch);
-            
-            // Pull latest changes
-            let output = tokio::process::Command::new("git")
-                .arg("pull")
-                .arg("origin")
-                .arg(git_branch)
-                .current_dir(working_dir)
-                .output()
-                .await?;
-                
-            if !output.status.success() {
-                let stderr = String::from_utf8_lossy(&output.stderr);
-                error!("Git pull failed: {}", stderr);
-                return Err(anyhow::anyhow!("Git pull failed: {}", stderr));
-            }
-            
-            info!("Git repository updated successfully");
-        } else {
-            info!("Cloning git repository from {} (branch: {})", git_url, git_branch);
-            
-            // Clone repository with authentication if available
-            let output = tokio::process::Command::new("git")
-                .arg("clone")
-                .arg("--branch")
-                .arg(git_branch)
-                .arg(&auth_url)  // Use authenticated URL
-                .arg(working_dir)
-                .output()
-                .await?;
-                
-            if !output.status.success() {
-                let stderr = String::from_utf8_lossy(&output.stderr);
-                error!("Git clone failed: {}", stderr);
-                return Err(anyhow::anyhow!("Git clone failed: {}", stderr));
-            }
-            
-            info!("Git repository cloned successfully");
        }
-        
-        Ok(())
    }
-}
+
+}
--- a/agent/src/cache/cached_metric.rs
+++ b/agent/src/cache/cached_metric.rs
@@ -1,10 +0,0 @@
-use cm_dashboard_shared::Metric;
-use std::time::Instant;
-
-/// A cached metric with metadata
-#[derive(Debug, Clone)]
-pub struct CachedMetric {
-    pub metric: Metric,
-    pub collected_at: Instant,
-    pub access_count: u64,
-}
--- a/agent/src/cache/manager.rs
+++ b/agent/src/cache/manager.rs
@@ -1,33 +0,0 @@
-use super::ConfigurableCache;
-use cm_dashboard_shared::{CacheConfig, Metric};
-use std::sync::Arc;
-use tracing::info;
-
-/// Manages metric caching with background tasks
-pub struct MetricCacheManager {
-    cache: Arc<ConfigurableCache>,
-}
-
-impl MetricCacheManager {
-    pub fn new(config: CacheConfig) -> Self {
-        let cache = Arc::new(ConfigurableCache::new(config.clone()));
-
-        Self { cache }
-    }
-
-    /// Start background cache management tasks
-    pub async fn start_background_tasks(&self) {
-        // Temporarily disabled to isolate CPU usage issue
-        info!("Cache manager background tasks disabled for debugging");
-    }
-
-    /// Store metric in cache
-    pub async fn cache_metric(&self, metric: Metric) {
-        self.cache.store_metric(metric).await;
-    }
-
-    /// Get all cached metrics (including expired ones) for broadcasting
-    pub async fn get_all_cached_metrics(&self) -> Vec<Metric> {
-        self.cache.get_all_cached_metrics().await
-    }
-}
--- a/agent/src/cache/mod.rs
+++ b/agent/src/cache/mod.rs
@@ -1,129 +0,0 @@
-use cm_dashboard_shared::{CacheConfig, Metric};
-use std::collections::HashMap;
-use std::fs;
-use std::path::Path;
-use std::sync::Arc;
-use tokio::sync::RwLock;
-use tracing::{info, warn, error};
-
-/// Simple persistent cache for metrics
-pub struct SimpleCache {
-    metrics: RwLock<HashMap<String, Metric>>,
-    persist_path: String,
-}
-
-impl SimpleCache {
-    pub fn new(config: CacheConfig) -> Self {
-        let cache = Self {
-            metrics: RwLock::new(HashMap::new()),
-            persist_path: config.persist_path,
-        };
-        
-        // Clear cache file on startup to ensure fresh data
-        cache.clear_cache_file();
-        cache
-    }
-
-    /// Store metric in cache
-    pub async fn store_metric(&self, metric: Metric) {
-        let mut metrics = self.metrics.write().await;
-        metrics.insert(metric.name.clone(), metric);
-    }
-
-    /// Get all cached metrics
-    pub async fn get_all_cached_metrics(&self) -> Vec<Metric> {
-        let metrics = self.metrics.read().await;
-        metrics.values().cloned().collect()
-    }
-
-    /// Save cache to disk
-    pub async fn save_to_disk(&self) {
-        let metrics = self.metrics.read().await;
-        
-        // Create directory if needed
-        if let Some(parent) = Path::new(&self.persist_path).parent() {
-            if let Err(e) = fs::create_dir_all(parent) {
-                warn!("Failed to create cache directory {}: {}", parent.display(), e);
-                return;
-            }
-        }
-
-        // Serialize and save
-        match serde_json::to_string_pretty(&*metrics) {
-            Ok(json) => {
-                if let Err(e) = fs::write(&self.persist_path, json) {
-                    error!("Failed to save cache to {}: {}", self.persist_path, e);
-                }
-            }
-            Err(e) => {
-                error!("Failed to serialize cache: {}", e);
-            }
-        }
-    }
-
-    /// Load cache from disk
-    fn load_from_disk(&self) {
-        match fs::read_to_string(&self.persist_path) {
-            Ok(content) => {
-                match serde_json::from_str::<HashMap<String, Metric>>(&content) {
-                    Ok(loaded_metrics) => {
-                        if let Ok(mut metrics) = self.metrics.try_write() {
-                            *metrics = loaded_metrics;
-                            info!("Loaded {} metrics from cache", metrics.len());
-                        }
-                    }
-                    Err(e) => {
-                        warn!("Failed to parse cache file {}: {}", self.persist_path, e);
-                    }
-                }
-            }
-            Err(_) => {
-                info!("No cache file found at {}, starting fresh", self.persist_path);
-            }
-        }
-    }
-
-    /// Clear cache file on startup to ensure fresh data
-    fn clear_cache_file(&self) {
-        if Path::new(&self.persist_path).exists() {
-            match fs::remove_file(&self.persist_path) {
-                Ok(_) => info!("Cleared cache file {} on startup", self.persist_path),
-                Err(e) => warn!("Failed to clear cache file {}: {}", self.persist_path, e),
-            }
-        }
-    }
-}
-
-
-#[derive(Clone)]
-pub struct MetricCacheManager {
-    cache: Arc<SimpleCache>,
-}
-
-impl MetricCacheManager {
-    pub fn new(config: CacheConfig) -> Self {
-        Self {
-            cache: Arc::new(SimpleCache::new(config)),
-        }
-    }
-
-    pub async fn store_metric(&self, metric: Metric) {
-        self.cache.store_metric(metric).await;
-    }
-
-    pub async fn cache_metric(&self, metric: Metric) {
-        self.store_metric(metric).await;
-    }
-
-    pub async fn start_background_tasks(&self) {
-        // No background tasks needed for simple cache
-    }
-
-    pub async fn get_all_cached_metrics(&self) -> Result<Vec<Metric>, anyhow::Error> {
-        Ok(self.cache.get_all_cached_metrics().await)
-    }
-
-    pub async fn save_to_disk(&self) {
-        self.cache.save_to_disk().await;
-    }
-}
--- a/agent/src/collectors/backup.rs
+++ b/agent/src/collectors/backup.rs
@@ -107,9 +107,6 @@ impl BackupCollector {

 #[async_trait]
 impl Collector for BackupCollector {
-    fn name(&self) -> &str {
-        "backup"
-    }

    async fn collect(&self, _status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
        let backup_status_option = self.read_backup_status().await?;
@@ -139,10 +136,12 @@ impl Collector for BackupCollector {
            name: "backup_overall_status".to_string(),
            value: MetricValue::String(match overall_status {
                Status::Ok => "ok".to_string(),
+                Status::Inactive => "inactive".to_string(),
                Status::Pending => "pending".to_string(),
                Status::Warning => "warning".to_string(),
                Status::Critical => "critical".to_string(),
                Status::Unknown => "unknown".to_string(),
+                Status::Offline => "offline".to_string(),
            }),
            status: overall_status,
            timestamp,
@@ -201,10 +200,12 @@ impl Collector for BackupCollector {
                name: format!("backup_service_{}_status", service_name),
                value: MetricValue::String(match service_status {
                    Status::Ok => "ok".to_string(),
+                    Status::Inactive => "inactive".to_string(),
                    Status::Pending => "pending".to_string(),
                    Status::Warning => "warning".to_string(),
                    Status::Critical => "critical".to_string(),
                    Status::Unknown => "unknown".to_string(),
+                    Status::Offline => "offline".to_string(),
                }),
                status: service_status,
                timestamp,
--- a/agent/src/collectors/cpu.rs
+++ b/agent/src/collectors/cpu.rs
@@ -15,7 +15,6 @@ use crate::config::CpuConfig;
 /// - No process spawning
 /// - <0.1ms collection time target
 pub struct CpuCollector {
-    name: String,
    load_thresholds: HysteresisThresholds,
    temperature_thresholds: HysteresisThresholds,
 }
@@ -34,7 +33,6 @@ impl CpuCollector {
        );
        
        Self {
-            name: "cpu".to_string(),
            load_thresholds,
            temperature_thresholds,
        }
@@ -197,9 +195,6 @@ impl CpuCollector {

 #[async_trait]
 impl Collector for CpuCollector {
-    fn name(&self) -> &str {
-        &self.name
-    }

    async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
        debug!("Collecting CPU metrics");
--- a/agent/src/collectors/disk.rs
+++ b/agent/src/collectors/disk.rs
@@ -41,11 +41,11 @@ pub struct DiskCollector {

 impl DiskCollector {
    pub fn new(config: DiskConfig) -> Self {
-        // Create hysteresis thresholds for disk temperature
+        // Create hysteresis thresholds for disk temperature from config
        let temperature_thresholds = HysteresisThresholds::with_custom_gaps(
-            60.0, // warning at 60°C
+            config.temperature_warning_celsius,
            5.0,  // 5°C gap for recovery
-            70.0, // critical at 70°C  
+            config.temperature_critical_celsius,
            5.0,  // 5°C gap for recovery
        );
        
@@ -219,18 +219,12 @@ impl DiskCollector {
    }

    /// Parse wear level from SMART output (SSD wear leveling)
+    /// Supports both NVMe and SATA SSD wear indicators
    fn parse_wear_level_from_smart(&self, smart_output: &str) -> Option<f32> {
        for line in smart_output.lines() {
-            // Look for wear leveling indicators
-            if line.contains("Wear_Leveling_Count") || line.contains("Media_Wearout_Indicator") {
-                let parts: Vec<&str> = line.split_whitespace().collect();
-                if parts.len() >= 10 {
-                    if let Ok(wear) = parts[9].parse::<f32>() {
-                        return Some(100.0 - wear); // Convert to percentage used
-                    }
-                }
-            }
-            // NVMe drives might show percentage used directly
+            let line = line.trim();
+            
+            // NVMe drives - direct percentage used
            if line.contains("Percentage Used:") {
                if let Some(wear_part) = line.split("Percentage Used:").nth(1) {
                    if let Some(wear_str) = wear_part.split('%').next() {
@@ -240,6 +234,38 @@ impl DiskCollector {
                    }
                }
            }
+            
+            // SATA SSD attributes - parse SMART table format
+            // Format: ID ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
+            let parts: Vec<&str> = line.split_whitespace().collect();
+            if parts.len() >= 10 {
+                // SSD Life Left / Percent Lifetime Remaining (higher = less wear)
+                if line.contains("SSD_Life_Left") || line.contains("Percent_Lifetime_Remain") {
+                    if let Ok(remaining) = parts[3].parse::<f32>() { // VALUE column
+                        return Some(100.0 - remaining); // Convert remaining to used
+                    }
+                }
+                
+                // Media Wearout Indicator (lower = more wear, normalize to 0-100)
+                if line.contains("Media_Wearout_Indicator") {
+                    if let Ok(remaining) = parts[3].parse::<f32>() { // VALUE column
+                        return Some(100.0 - remaining); // Convert remaining to used
+                    }
+                }
+                
+                // Wear Leveling Count (higher = less wear, but varies by manufacturer)
+                if line.contains("Wear_Leveling_Count") {
+                    if let Ok(wear_count) = parts[3].parse::<f32>() { // VALUE column
+                        // Most SSDs: 100 = new, decreases with wear
+                        if wear_count <= 100.0 {
+                            return Some(100.0 - wear_count);
+                        }
+                    }
+                }
+                
+                // Total LBAs Written - calculate against typical endurance if available
+                // This is more complex and manufacturer-specific, so we skip for now
+            }
        }
        None
    }
@@ -325,33 +351,6 @@ impl DiskCollector {
        Some(device_name.to_string())
    }

-    /// Get directory size using du command (efficient for single directory)
-    fn get_directory_size(&self, path: &str) -> Result<u64> {
-        let output = Command::new("du")
-            .arg("-s")
-            .arg("--block-size=1")
-            .arg(path)
-            .output()?;
-
-        // du returns success even with permission denied warnings in stderr
-        // We only care if the command completely failed or produced no stdout
-        let output_str = String::from_utf8(output.stdout)?;
-
-        if output_str.trim().is_empty() {
-            return Err(anyhow::anyhow!(
-                "du command produced no output for {}",
-                path
-            ));
-        }
-
-        let size_str = output_str
-            .split_whitespace()
-            .next()
-            .ok_or_else(|| anyhow::anyhow!("Failed to parse du output"))?;
-
-        let size_bytes = size_str.parse::<u64>()?;
-        Ok(size_bytes)
-    }

    /// Get filesystem info using df command
    fn get_filesystem_info(&self, path: &str) -> Result<(u64, u64)> {
@@ -382,23 +381,6 @@ impl DiskCollector {
        Ok((total_bytes, used_bytes))
    }

-    /// Calculate status based on usage percentage
-    fn calculate_usage_status(&self, used_bytes: u64, total_bytes: u64) -> Status {
-        if total_bytes == 0 {
-            return Status::Unknown;
-        }
-
-        let usage_percent = (used_bytes as f64 / total_bytes as f64) * 100.0;
-
-        // Thresholds for disk usage
-        if usage_percent >= 95.0 {
-            Status::Critical
-        } else if usage_percent >= 85.0 {
-            Status::Warning
-        } else {
-            Status::Ok
-        }
-    }

    /// Parse size string (e.g., "120G", "45M") to GB value
    fn parse_size_to_gb(&self, size_str: &str) -> f32 {
@@ -435,9 +417,6 @@ impl DiskCollector {

 #[async_trait]
 impl Collector for DiskCollector {
-    fn name(&self) -> &str {
-        "disk"
-    }

    async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
        let start_time = Instant::now();
@@ -577,8 +556,8 @@ impl Collector for DiskCollector {

                // Drive wear level (for SSDs)
                if let Some(wear) = drive.wear_level {
-                    let wear_status = if wear >= 90.0 { Status::Critical }
-                                     else if wear >= 80.0 { Status::Warning }
+                    let wear_status = if wear >= self.config.wear_critical_percent { Status::Critical }
+                                     else if wear >= self.config.wear_warning_percent { Status::Warning }
                                     else { Status::Ok };
                    
                    metrics.push(Metric {
--- a/agent/src/collectors/memory.rs
+++ b/agent/src/collectors/memory.rs
@@ -15,7 +15,6 @@ use crate::config::MemoryConfig;
 /// - No regex or complex parsing
 /// - <0.1ms collection time target
 pub struct MemoryCollector {
-    name: String,
    usage_thresholds: HysteresisThresholds,
 }

@@ -42,7 +41,6 @@ impl MemoryCollector {
        );
        
        Self {
-            name: "memory".to_string(),
            usage_thresholds,
        }
    }
@@ -189,7 +187,7 @@ impl MemoryCollector {
        }

        // Monitor tmpfs (/tmp) usage
-        if let Ok(tmpfs_metrics) = self.get_tmpfs_metrics() {
+        if let Ok(tmpfs_metrics) = self.get_tmpfs_metrics(status_tracker) {
            metrics.extend(tmpfs_metrics);
        }

@@ -197,7 +195,7 @@ impl MemoryCollector {
    }

    /// Get tmpfs (/tmp) usage metrics  
-    fn get_tmpfs_metrics(&self) -> Result<Vec<Metric>, CollectorError> {
+    fn get_tmpfs_metrics(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
        use std::process::Command;
        
        let output = Command::new("df")
@@ -251,12 +249,15 @@ impl MemoryCollector {
        let mut metrics = Vec::new();
        let timestamp = chrono::Utc::now().timestamp() as u64;

+        // Calculate status using same thresholds as main memory
+        let tmp_status = self.calculate_usage_status("memory_tmp_usage_percent", usage_percent, status_tracker);
+        
        metrics.push(Metric {
            name: "memory_tmp_usage_percent".to_string(),
            value: MetricValue::Float(usage_percent),
            unit: Some("%".to_string()),
            description: Some("tmpfs /tmp usage percentage".to_string()),
-            status: Status::Ok,
+            status: tmp_status,
            timestamp,
        });

@@ -284,9 +285,6 @@ impl MemoryCollector {

 #[async_trait]
 impl Collector for MemoryCollector {
-    fn name(&self) -> &str {
-        &self.name
-    }

    async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
        debug!("Collecting memory metrics");
--- a/agent/src/collectors/mod.rs
+++ b/agent/src/collectors/mod.rs
@@ -16,9 +16,6 @@ pub use error::CollectorError;
 /// Base trait for all collectors with extreme efficiency requirements
 #[async_trait]
 pub trait Collector: Send + Sync {
-    /// Name of this collector
-    fn name(&self) -> &str;
-
    /// Collect all metrics this collector provides
    async fn collect(&self, status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError>;

--- a/agent/src/collectors/nixos.rs
+++ b/agent/src/collectors/nixos.rs
@@ -10,41 +10,14 @@ use crate::config::NixOSConfig;
 /// 
 /// Collects NixOS-specific system information including:
 /// - NixOS version and build information
-/// - Currently active/logged in users
 pub struct NixOSCollector {
-    config: NixOSConfig,
 }

 impl NixOSCollector {
-    pub fn new(config: NixOSConfig) -> Self {
-        Self { config }
+    pub fn new(_config: NixOSConfig) -> Self {
+        Self {}
    }

-    /// Get NixOS build information
-    fn get_nixos_build_info(&self) -> Result<String, Box<dyn std::error::Error>> {
-        // Get nixos-version output directly
-        let output = Command::new("nixos-version").output()?;
-        
-        if !output.status.success() {
-            return Err("nixos-version command failed".into());
-        }
-
-        let version_line = String::from_utf8_lossy(&output.stdout);
-        let version = version_line.trim();
-        
-        if version.is_empty() {
-            return Err("Empty nixos-version output".into());
-        }
-        
-        // Remove codename part (e.g., "(Warbler)")
-        let clean_version = if let Some(pos) = version.find(" (") {
-            version[..pos].to_string()
-        } else {
-            version.to_string()
-        };
-        
-        Ok(clean_version)
-    }

    /// Get agent hash from binary path
    fn get_agent_hash(&self) -> Result<String, Box<dyn std::error::Error>> {
@@ -64,6 +37,22 @@ impl NixOSCollector {
    }

    /// Get configuration hash from deployed nix store system
+    /// Get git commit hash from rebuild process
+    fn get_git_commit(&self) -> Result<String, Box<dyn std::error::Error>> {
+        let commit_file = "/var/lib/cm-dashboard/git-commit";
+        match std::fs::read_to_string(commit_file) {
+            Ok(content) => {
+                let commit_hash = content.trim();
+                if commit_hash.len() >= 7 {
+                    Ok(commit_hash.to_string())
+                } else {
+                    Err("Git commit hash too short".into())
+                }
+            }
+            Err(e) => Err(format!("Failed to read git commit file: {}", e).into())
+        }
+    }
+
    fn get_config_hash(&self) -> Result<String, Box<dyn std::error::Error>> {
        // Read the symlink target of /run/current-system to get nix store path
        let output = Command::new("readlink")
@@ -91,90 +80,41 @@ impl NixOSCollector {
        Err("Could not extract hash from nix store path".into())
    }

-    /// Get currently active users
-    fn get_active_users(&self) -> Result<Vec<String>, Box<dyn std::error::Error>> {
-        let output = Command::new("who").output()?;
-        
-        if !output.status.success() {
-            return Err("who command failed".into());
-        }
-
-        let who_output = String::from_utf8_lossy(&output.stdout);
-        let mut users = std::collections::HashSet::new();
-
-        for line in who_output.lines() {
-            if let Some(username) = line.split_whitespace().next() {
-                if !username.is_empty() {
-                    users.insert(username.to_string());
-                }
-            }
-        }
-
-        Ok(users.into_iter().collect())
-    }
 }

 #[async_trait]
 impl Collector for NixOSCollector {
-    fn name(&self) -> &str {
-        "nixos"
-    }

    async fn collect(&self, _status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
        debug!("Collecting NixOS system information");
        let mut metrics = Vec::new();
        let timestamp = chrono::Utc::now().timestamp() as u64;

-        // Collect NixOS build information
-        match self.get_nixos_build_info() {
-            Ok(build_info) => {
+        // Collect git commit information (shows what's actually deployed)
+        match self.get_git_commit() {
+            Ok(git_commit) => {
                metrics.push(Metric {
                    name: "system_nixos_build".to_string(),
-                    value: MetricValue::String(build_info),
+                    value: MetricValue::String(git_commit),
                    unit: None,
-                    description: Some("NixOS build information".to_string()),
+                    description: Some("Git commit hash of deployed configuration".to_string()),
                    status: Status::Ok,
                    timestamp,
                });
            }
            Err(e) => {
-                debug!("Failed to get NixOS build info: {}", e);
+                debug!("Failed to get git commit: {}", e);
                metrics.push(Metric {
                    name: "system_nixos_build".to_string(),
                    value: MetricValue::String("unknown".to_string()),
                    unit: None,
-                    description: Some("NixOS build (failed to detect)".to_string()),
+                    description: Some("Git commit hash (failed to detect)".to_string()),
                    status: Status::Unknown,
                    timestamp,
                });
            }
        }

-        // Collect active users
-        match self.get_active_users() {
-            Ok(users) => {
-                let users_str = users.join(", ");
-                metrics.push(Metric {
-                    name: "system_active_users".to_string(),
-                    value: MetricValue::String(users_str),
-                    unit: None,
-                    description: Some("Currently active users".to_string()),
-                    status: Status::Ok,
-                    timestamp,
-                });
-            }
-            Err(e) => {
-                debug!("Failed to get active users: {}", e);
-                metrics.push(Metric {
-                    name: "system_active_users".to_string(),
-                    value: MetricValue::String("unknown".to_string()),
-                    unit: None,
-                    description: Some("Active users (failed to detect)".to_string()),
-                    status: Status::Unknown,
-                    timestamp,
-                });
-            }
-        }

        // Collect config hash
        match self.get_config_hash() {
--- a/agent/src/collectors/systemd.rs
+++ b/agent/src/collectors/systemd.rs
@@ -8,6 +8,7 @@ use tracing::debug;

 use super::{Collector, CollectorError};
 use crate::config::SystemdConfig;
+use crate::service_tracker::UserStoppedServiceTracker;

 /// Systemd collector for monitoring systemd services
 pub struct SystemdCollector {
@@ -32,7 +33,7 @@ struct ServiceCacheState {
    nginx_site_metrics: Vec<Metric>,
    /// Last time nginx sites were checked
    last_nginx_check_time: Option<Instant>,
-    /// How often to check nginx site latency (30 seconds)
+    /// How often to check nginx site latency (configurable)
    nginx_check_interval_seconds: u64,
 }

@@ -42,7 +43,6 @@ struct ServiceStatusInfo {
    load_state: String,
    active_state: String,
    sub_state: String,
-    description: String,
 }

 impl SystemdCollector {
@@ -55,7 +55,7 @@ impl SystemdCollector {
                discovery_interval_seconds: config.interval_seconds,
                nginx_site_metrics: Vec::new(),
                last_nginx_check_time: None,
-                nginx_check_interval_seconds: 30, // 30 seconds for nginx sites
+                nginx_check_interval_seconds: config.nginx_check_interval_seconds,
            }),
            config,
        }
@@ -137,8 +137,21 @@ impl SystemdCollector {
    /// Auto-discover interesting services to monitor (internal version that doesn't update state)
    fn discover_services_internal(&self) -> Result<(Vec<String>, std::collections::HashMap<String, ServiceStatusInfo>)> {
        debug!("Starting systemd service discovery with status caching");
-        // Get all services (includes inactive, running, failed - everything)
-        let units_output = Command::new("systemctl")
+        
+        // First: Get all service unit files (includes services that have never been started)
+        let unit_files_output = Command::new("systemctl")
+            .arg("list-unit-files")
+            .arg("--type=service")
+            .arg("--no-pager")
+            .arg("--plain")
+            .output()?;
+
+        if !unit_files_output.status.success() {
+            return Err(anyhow::anyhow!("systemctl list-unit-files command failed"));
+        }
+
+        // Second: Get runtime status of all units
+        let units_status_output = Command::new("systemctl")
            .arg("list-units")
            .arg("--type=service")
            .arg("--all")
@@ -146,22 +159,33 @@ impl SystemdCollector {
            .arg("--plain")
            .output()?;

-        if !units_output.status.success() {
-            return Err(anyhow::anyhow!("systemctl system command failed"));
+        if !units_status_output.status.success() {
+            return Err(anyhow::anyhow!("systemctl list-units command failed"));
        }

-        let units_str = String::from_utf8(units_output.stdout)?;
+        let unit_files_str = String::from_utf8(unit_files_output.stdout)?;
+        let units_status_str = String::from_utf8(units_status_output.stdout)?;
        let mut services = Vec::new();

        // Use configuration instead of hardcoded values
        let excluded_services = &self.config.excluded_services;
        let service_name_filters = &self.config.service_name_filters;

-        // Parse all services and cache their status information
+        // Parse all service unit files to get complete service list
        let mut all_service_names = std::collections::HashSet::new();
-        let mut status_cache = std::collections::HashMap::new();
        
-        for line in units_str.lines() {
+        for line in unit_files_str.lines() {
+            let fields: Vec<&str> = line.split_whitespace().collect();
+            if fields.len() >= 2 && fields[0].ends_with(".service") {
+                let service_name = fields[0].trim_end_matches(".service");
+                all_service_names.insert(service_name.to_string());
+                debug!("Found service unit file: {}", service_name);
+            }
+        }
+
+        // Parse runtime status for all units
+        let mut status_cache = std::collections::HashMap::new();
+        for line in units_status_str.lines() {
            let fields: Vec<&str> = line.split_whitespace().collect();
            if fields.len() >= 4 && fields[0].ends_with(".service") {
                let service_name = fields[0].trim_end_matches(".service");
@@ -170,22 +194,27 @@ impl SystemdCollector {
                let load_state = fields.get(1).unwrap_or(&"unknown").to_string();
                let active_state = fields.get(2).unwrap_or(&"unknown").to_string();
                let sub_state = fields.get(3).unwrap_or(&"unknown").to_string();
-                let description = if fields.len() > 4 {
-                    fields[4..].join(" ")
-                } else {
-                    "".to_string()
-                };

                // Cache the status information
                status_cache.insert(service_name.to_string(), ServiceStatusInfo {
                    load_state: load_state.clone(),
                    active_state: active_state.clone(),
                    sub_state: sub_state.clone(),
-                    description,
                });

-                all_service_names.insert(service_name.to_string());
-                debug!("Parsed service: {} (load:{}, active:{}, sub:{})", service_name, load_state, active_state, sub_state);
+                debug!("Got runtime status for service: {} (load:{}, active:{}, sub:{})", service_name, load_state, active_state, sub_state);
+            }
+        }
+
+        // For services found in unit files but not in runtime status, set default inactive status
+        for service_name in &all_service_names {
+            if !status_cache.contains_key(service_name) {
+                status_cache.insert(service_name.to_string(), ServiceStatusInfo {
+                    load_state: "not-loaded".to_string(),
+                    active_state: "inactive".to_string(),
+                    sub_state: "dead".to_string(),
+                });
+                debug!("Service {} found in unit files but not runtime - marked as inactive", service_name);
            }
        }

@@ -325,13 +354,38 @@ impl SystemdCollector {
        Ok((active_status, detailed_info))
    }

-    /// Calculate service status
-    fn calculate_service_status(&self, active_status: &str) -> Status {
+    /// Calculate service status, taking user-stopped services into account
+    fn calculate_service_status(&self, service_name: &str, active_status: &str) -> Status {
        match active_status.to_lowercase().as_str() {
-            "active" => Status::Ok,
-            "inactive" | "dead" => Status::Warning,
+            "active" => {
+                // If service is now active and was marked as user-stopped, clear the flag
+                if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
+                    debug!("Service '{}' is now active - clearing user-stopped flag", service_name);
+                    // Note: We can't directly clear here because this is a read-only context
+                    // The agent will need to handle this differently
+                }
+                Status::Ok
+            },
+            "inactive" | "dead" => {
+                // Check if this service was stopped by user action
+                if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
+                    debug!("Service '{}' is inactive but marked as user-stopped - treating as OK", service_name);
+                    Status::Ok
+                } else {
+                    debug!("Service '{}' is inactive - treating as Inactive status", service_name);
+                    Status::Inactive
+                }
+            },
            "failed" | "error" => Status::Critical,
-            "activating" | "deactivating" | "reloading" | "start" | "stop" | "restart" => Status::Pending,
+            "activating" | "deactivating" | "reloading" | "start" | "stop" | "restart" => {
+                // For user-stopped services that are transitioning, keep them as OK during transition
+                if UserStoppedServiceTracker::is_service_user_stopped(service_name) {
+                    debug!("Service '{}' is transitioning but was user-stopped - treating as OK", service_name);
+                    Status::Ok
+                } else {
+                    Status::Pending
+                }
+            },
            _ => Status::Unknown,
        }
    }
@@ -432,9 +486,6 @@ impl SystemdCollector {

 #[async_trait]
 impl Collector for SystemdCollector {
-    fn name(&self) -> &str {
-        "systemd"
-    }

    async fn collect(&self, _status_tracker: &mut StatusTracker) -> Result<Vec<Metric>, CollectorError> {
        let start_time = Instant::now();
@@ -455,7 +506,7 @@ impl Collector for SystemdCollector {
        for service in &monitored_services {
            match self.get_service_status(service) {
                Ok((active_status, _detailed_info)) => {
-                    let status = self.calculate_service_status(&active_status);
+                    let status = self.calculate_service_status(service, &active_status);

                    // Individual service status metric
                    metrics.push(Metric {
@@ -530,10 +581,8 @@ impl SystemdCollector {
        for (site_name, url) in &sites {
            match self.check_site_latency(url) {
                Ok(latency_ms) => {
-                    let status = if latency_ms < 500.0 {
+                    let status = if latency_ms < self.config.nginx_latency_critical_ms {
                        Status::Ok
-                    } else if latency_ms < 2000.0 {
-                        Status::Warning
                    } else {
                        Status::Critical
                    };
@@ -625,10 +674,10 @@ impl SystemdCollector {

        let start = Instant::now();

-        // Create HTTP client with timeouts (similar to legacy implementation)
+        // Create HTTP client with timeouts from configuration
        let client = reqwest::blocking::Client::builder()
-            .timeout(Duration::from_secs(10))
-            .connect_timeout(Duration::from_secs(10))
+            .timeout(Duration::from_secs(self.config.http_timeout_seconds))
+            .connect_timeout(Duration::from_secs(self.config.http_connect_timeout_seconds))
            .redirect(reqwest::redirect::Policy::limited(10))
            .build()?;

--- a/agent/src/communication/mod.rs
+++ b/agent/src/communication/mod.rs
@@ -65,7 +65,6 @@ impl ZmqHandler {
        Ok(())
    }

-    /// Send heartbeat (placeholder for future use)

    /// Try to receive a command (non-blocking)
    pub fn try_receive_command(&self) -> Result<Option<AgentCommand>> {
@@ -99,25 +98,4 @@ pub enum AgentCommand {
    ToggleCollector { name: String, enabled: bool },
    /// Request status/health check
    Ping,
-    /// Control systemd service
-    ServiceControl {
-        service_name: String,
-        action: ServiceAction,
-    },
-    /// Rebuild NixOS system
-    SystemRebuild {
-        git_url: String,
-        git_branch: String,
-        working_dir: String,
-        api_key_file: Option<String>,
-    },
-}
-
-/// Service control actions
-#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
-pub enum ServiceAction {
-    Start,
-    Stop,
-    Restart,
-    Status,
 }
--- a/agent/src/config/mod.rs
+++ b/agent/src/config/mod.rs
@@ -25,8 +25,10 @@ pub struct ZmqConfig {
    pub publisher_port: u16,
    pub command_port: u16,
    pub bind_address: String,
-    pub timeout_ms: u64,
-    pub heartbeat_interval_ms: u64,
+    pub transmission_interval_seconds: u64,
+    /// Heartbeat transmission interval in seconds for host connectivity detection
+    #[serde(default = "default_heartbeat_interval_seconds")]
+    pub heartbeat_interval_seconds: u64,
 }

 /// Collector configuration
@@ -36,7 +38,6 @@ pub struct CollectorConfig {
    pub memory: MemoryConfig,
    pub disk: DiskConfig,
    pub systemd: SystemdConfig,
-    pub smart: SmartConfig,
    pub backup: BackupConfig,
    pub network: NetworkConfig,
    pub nixos: NixOSConfig,
@@ -75,6 +76,11 @@ pub struct DiskConfig {
    pub usage_critical_percent: f32,
    /// Filesystem configurations
    pub filesystems: Vec<FilesystemConfig>,
+    /// SMART monitoring thresholds
+    pub temperature_warning_celsius: f32,
+    pub temperature_critical_celsius: f32,
+    pub wear_warning_percent: f32,
+    pub wear_critical_percent: f32,
 }

 /// Filesystem configuration entry
@@ -100,18 +106,12 @@ pub struct SystemdConfig {
    pub memory_critical_mb: f32,
    pub service_directories: std::collections::HashMap<String, Vec<String>>,
    pub host_user_mapping: String,
+    pub nginx_check_interval_seconds: u64,
+    pub http_timeout_seconds: u64,
+    pub http_connect_timeout_seconds: u64,
+    pub nginx_latency_critical_ms: f32,
 }

-/// SMART collector configuration
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SmartConfig {
-    pub enabled: bool,
-    pub interval_seconds: u64,
-    pub temperature_warning_celsius: f32,
-    pub temperature_critical_celsius: f32,
-    pub wear_warning_percent: f32,
-    pub wear_critical_percent: f32,
-}

 /// NixOS collector configuration
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -145,6 +145,23 @@ pub struct NotificationConfig {
    pub from_email: String,
    pub to_email: String,
    pub rate_limit_minutes: u64,
+    /// Email notification batching interval in seconds (default: 60)
+    pub aggregation_interval_seconds: u64,
+    /// List of metric names to exclude from email notifications
+    #[serde(default)]
+    pub exclude_email_metrics: Vec<String>,
+    /// Path to maintenance mode file that suppresses email notifications when present
+    #[serde(default = "default_maintenance_mode_file")]
+    pub maintenance_mode_file: String,
+}
+
+
+fn default_heartbeat_interval_seconds() -> u64 {
+    5
+}
+
+fn default_maintenance_mode_file() -> String {
+    "/tmp/cm-maintenance".to_string()
 }

 impl AgentConfig {
--- a/agent/src/config/validation.rs
+++ b/agent/src/config/validation.rs
@@ -19,10 +19,6 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
        bail!("ZMQ bind address cannot be empty");
    }

-    if config.zmq.timeout_ms == 0 {
-        bail!("ZMQ timeout cannot be 0");
-    }
-
    // Validate collection interval
    if config.collection_interval_seconds == 0 {
        bail!("Collection interval cannot be 0");
@@ -83,6 +79,13 @@ pub fn validate_config(config: &AgentConfig) -> Result<()> {
        }
    }

+    // Validate systemd configuration
+    if config.collectors.systemd.enabled {
+        if config.collectors.systemd.nginx_latency_critical_ms <= 0.0 {
+            bail!("Nginx latency critical threshold must be positive");
+        }
+    }
+
    // Validate SMTP configuration
    if config.notifications.enabled {
        if config.notifications.smtp_host.is_empty() {
--- a/agent/src/main.rs
+++ b/agent/src/main.rs
@@ -4,20 +4,36 @@ use tracing::{error, info};
 use tracing_subscriber::EnvFilter;

 mod agent;
-mod cache;
 mod collectors;
 mod communication;
 mod config;
 mod metrics;
 mod notifications;
+mod service_tracker;
 mod status;

 use agent::Agent;

+/// Get version showing cm-dashboard-agent package hash for easy deployment verification
+fn get_version() -> &'static str {
+    // Get the path of the current executable
+    let exe_path = std::env::current_exe().expect("Failed to get executable path");
+    let exe_str = exe_path.to_string_lossy();
+    
+    // Extract Nix store hash from path like /nix/store/HASH-cm-dashboard-v0.1.8/bin/cm-dashboard-agent
+    let hash_part = exe_str.strip_prefix("/nix/store/").expect("Not a nix store path");
+    let hash = hash_part.split('-').next().expect("Invalid nix store path format");
+    assert!(hash.len() >= 8, "Hash too short");
+    
+    // Return first 8 characters of nix store hash
+    let short_hash = hash[..8].to_string();
+    Box::leak(short_hash.into_boxed_str())
+}
+
 #[derive(Parser)]
 #[command(name = "cm-dashboard-agent")]
 #[command(about = "CM Dashboard metrics agent with individual metric collection")]
-#[command(version)]
+#[command(version = get_version())]
 struct Cli {
    /// Increase logging verbosity (-v, -vv)
    #[arg(short, long, action = clap::ArgAction::Count)]
--- a/agent/src/metrics/mod.rs
+++ b/agent/src/metrics/mod.rs
@@ -1,27 +1,32 @@
 use anyhow::Result;
 use cm_dashboard_shared::{Metric, StatusTracker};
-use std::collections::HashMap;
-use std::time::Instant;
+use std::time::{Duration, Instant};
 use tracing::{debug, error, info};

-use crate::cache::MetricCacheManager;
 use crate::collectors::{
    backup::BackupCollector, cpu::CpuCollector, disk::DiskCollector, memory::MemoryCollector,
    nixos::NixOSCollector, systemd::SystemdCollector, Collector,
 };
 use crate::config::{AgentConfig, CollectorConfig};

-/// Manages all metric collectors with intelligent caching
+/// Collector with timing information
+struct TimedCollector {
+    collector: Box<dyn Collector>,
+    interval: Duration,
+    last_collection: Option<Instant>,
+    name: String,
+}
+
+/// Manages all metric collectors with individual intervals
 pub struct MetricCollectionManager {
-    collectors: Vec<Box<dyn Collector>>,
-    cache_manager: MetricCacheManager,
-    last_collection_times: HashMap<String, Instant>,
+    collectors: Vec<TimedCollector>,
    status_tracker: StatusTracker,
+    cached_metrics: Vec<Metric>,
 }

 impl MetricCollectionManager {
-    pub async fn new(config: &CollectorConfig, agent_config: &AgentConfig) -> Result<Self> {
-        let mut collectors: Vec<Box<dyn Collector>> = Vec::new();
+    pub async fn new(config: &CollectorConfig, _agent_config: &AgentConfig) -> Result<Self> {
+        let mut collectors: Vec<TimedCollector> = Vec::new();

        // Benchmark mode - only enable specific collector based on env var
        let benchmark_mode = std::env::var("BENCHMARK_COLLECTOR").ok();
@@ -31,7 +36,12 @@ impl MetricCollectionManager {
                // CPU collector only
                if config.cpu.enabled {
                    let cpu_collector = CpuCollector::new(config.cpu.clone());
-                    collectors.push(Box::new(cpu_collector));
+                    collectors.push(TimedCollector {
+                        collector: Box::new(cpu_collector),
+                        interval: Duration::from_secs(config.cpu.interval_seconds),
+                        last_collection: None,
+                        name: "CPU".to_string(),
+                    });
                    info!("BENCHMARK: CPU collector only");
                }
            }
@@ -39,20 +49,35 @@ impl MetricCollectionManager {
                // Memory collector only
                if config.memory.enabled {
                    let memory_collector = MemoryCollector::new(config.memory.clone());
-                    collectors.push(Box::new(memory_collector));
+                    collectors.push(TimedCollector {
+                        collector: Box::new(memory_collector),
+                        interval: Duration::from_secs(config.memory.interval_seconds),
+                        last_collection: None,
+                        name: "Memory".to_string(),
+                    });
                    info!("BENCHMARK: Memory collector only");
                }
            }
            Some("disk") => {
                // Disk collector only
                let disk_collector = DiskCollector::new(config.disk.clone());
-                collectors.push(Box::new(disk_collector));
+                collectors.push(TimedCollector {
+                    collector: Box::new(disk_collector),
+                    interval: Duration::from_secs(config.disk.interval_seconds),
+                    last_collection: None,
+                    name: "Disk".to_string(),
+                });
                info!("BENCHMARK: Disk collector only");
            }
            Some("systemd") => {
                // Systemd collector only
                let systemd_collector = SystemdCollector::new(config.systemd.clone());
-                collectors.push(Box::new(systemd_collector));
+                collectors.push(TimedCollector {
+                    collector: Box::new(systemd_collector),
+                    interval: Duration::from_secs(config.systemd.interval_seconds),
+                    last_collection: None,
+                    name: "Systemd".to_string(),
+                });
                info!("BENCHMARK: Systemd collector only");
            }
            Some("backup") => {
@@ -62,7 +87,12 @@ impl MetricCollectionManager {
                        config.backup.backup_paths.first().cloned(),
                        config.backup.max_age_hours,
                    );
-                    collectors.push(Box::new(backup_collector));
+                    collectors.push(TimedCollector {
+                        collector: Box::new(backup_collector),
+                        interval: Duration::from_secs(config.backup.interval_seconds),
+                        last_collection: None,
+                        name: "Backup".to_string(),
+                    });
                    info!("BENCHMARK: Backup collector only");
                }
            }
@@ -74,57 +104,81 @@ impl MetricCollectionManager {
                // Normal mode - all collectors
                if config.cpu.enabled {
                    let cpu_collector = CpuCollector::new(config.cpu.clone());
-                    collectors.push(Box::new(cpu_collector));
-                    info!("CPU collector initialized");
+                    collectors.push(TimedCollector {
+                        collector: Box::new(cpu_collector),
+                        interval: Duration::from_secs(config.cpu.interval_seconds),
+                        last_collection: None,
+                        name: "CPU".to_string(),
+                    });
+                    info!("CPU collector initialized with {}s interval", config.cpu.interval_seconds);
                }

                if config.memory.enabled {
                    let memory_collector = MemoryCollector::new(config.memory.clone());
-                    collectors.push(Box::new(memory_collector));
-                    info!("Memory collector initialized");
+                    collectors.push(TimedCollector {
+                        collector: Box::new(memory_collector),
+                        interval: Duration::from_secs(config.memory.interval_seconds),
+                        last_collection: None,
+                        name: "Memory".to_string(),
+                    });
+                    info!("Memory collector initialized with {}s interval", config.memory.interval_seconds);
                }

                let disk_collector = DiskCollector::new(config.disk.clone());
-                collectors.push(Box::new(disk_collector));
-                info!("Disk collector initialized");
+                collectors.push(TimedCollector {
+                    collector: Box::new(disk_collector),
+                    interval: Duration::from_secs(config.disk.interval_seconds),
+                    last_collection: None,
+                    name: "Disk".to_string(),
+                });
+                info!("Disk collector initialized with {}s interval", config.disk.interval_seconds);

                let systemd_collector = SystemdCollector::new(config.systemd.clone());
-                collectors.push(Box::new(systemd_collector));
-                info!("Systemd collector initialized");
+                collectors.push(TimedCollector {
+                    collector: Box::new(systemd_collector),
+                    interval: Duration::from_secs(config.systemd.interval_seconds),
+                    last_collection: None,
+                    name: "Systemd".to_string(),
+                });
+                info!("Systemd collector initialized with {}s interval", config.systemd.interval_seconds);

                if config.backup.enabled {
                    let backup_collector = BackupCollector::new(
                        config.backup.backup_paths.first().cloned(),
                        config.backup.max_age_hours,
                    );
-                    collectors.push(Box::new(backup_collector));
-                    info!("Backup collector initialized");
+                    collectors.push(TimedCollector {
+                        collector: Box::new(backup_collector),
+                        interval: Duration::from_secs(config.backup.interval_seconds),
+                        last_collection: None,
+                        name: "Backup".to_string(),
+                    });
+                    info!("Backup collector initialized with {}s interval", config.backup.interval_seconds);
                }

                if config.nixos.enabled {
                    let nixos_collector = NixOSCollector::new(config.nixos.clone());
-                    collectors.push(Box::new(nixos_collector));
-                    info!("NixOS collector initialized");
+                    collectors.push(TimedCollector {
+                        collector: Box::new(nixos_collector),
+                        interval: Duration::from_secs(config.nixos.interval_seconds),
+                        last_collection: None,
+                        name: "NixOS".to_string(),
+                    });
+                    info!("NixOS collector initialized with {}s interval", config.nixos.interval_seconds);
                }
+
            }
        }

-        // Initialize cache manager with configuration
-        let cache_manager = MetricCacheManager::new(agent_config.cache.clone());
-
-        // Start background cache tasks
-        cache_manager.start_background_tasks().await;
-
        info!(
-            "Metric collection manager initialized with {} collectors and caching enabled",
+            "Metric collection manager initialized with {} collectors",
            collectors.len()
        );

        Ok(Self {
            collectors,
-            cache_manager,
-            last_collection_times: HashMap::new(),
            status_tracker: StatusTracker::new(),
+            cached_metrics: Vec::new(),
        })
    }

@@ -133,129 +187,78 @@ impl MetricCollectionManager {
        let mut all_metrics = Vec::new();
        let now = Instant::now();

-        info!(
-            "Force collecting from ALL {} collectors for startup",
-            self.collectors.len()
-        );
-
-        // Force collection from every collector regardless of intervals
-        for collector in &self.collectors {
-            let collector_name = collector.name();
-
-            match collector.collect(&mut self.status_tracker).await {
+        for timed_collector in &mut self.collectors {
+            match timed_collector.collector.collect(&mut self.status_tracker).await {
                Ok(metrics) => {
-                    info!(
-                        "Force collected {} metrics from {} collector",
-                        metrics.len(),
-                        collector_name
-                    );
-
-                    // Cache all new metrics
-                    for metric in &metrics {
-                        self.cache_manager.cache_metric(metric.clone()).await;
-                    }
-
+                    let metric_count = metrics.len();
                    all_metrics.extend(metrics);
-                    self.last_collection_times
-                        .insert(collector_name.to_string(), now);
+                    timed_collector.last_collection = Some(now);
+                    debug!("Force collected {} metrics from {}", metric_count, timed_collector.name);
                }
                Err(e) => {
-                    error!(
-                        "Collector '{}' failed during force collection: {}",
-                        collector_name, e
-                    );
-                    // Continue with other collectors even if one fails
+                    error!("Collector {} failed: {}", timed_collector.name, e);
                }
            }
        }
-
-        info!(
-            "Force collection completed: {} total metrics cached",
-            all_metrics.len()
-        );
+        
+        // Cache the collected metrics
+        self.cached_metrics = all_metrics.clone();
        Ok(all_metrics)
    }

-    /// Collect metrics from all collectors with intelligent caching
-    pub async fn collect_all_metrics(&mut self) -> Result<Vec<Metric>> {
+    /// Collect metrics from collectors whose intervals have elapsed
+    pub async fn collect_metrics_timed(&mut self) -> Result<Vec<Metric>> {
        let mut all_metrics = Vec::new();
        let now = Instant::now();

-        // Collecting metrics from collectors (debug logging disabled for performance)
-
-        // Keep track of which collector types we're collecting fresh data from
-        let mut collecting_fresh = std::collections::HashSet::new();
-
-        // For each collector, check if we need to collect based on time intervals
-        for collector in &self.collectors {
-            let collector_name = collector.name();
-
-            // Determine cache interval for this collector type based on data volatility
-            let cache_interval_secs = match collector_name {
-                "cpu" | "memory" => 5,    // Fast updates for volatile metrics
-                "systemd" => 30,          // Service status changes less frequently
-                "disk" => 300,            // SMART data changes very slowly (5 minutes)
-                "backup" => 600,          // Backup status changes rarely (10 minutes)
-                _ => 30,                  // Default: moderate frequency
+        for timed_collector in &mut self.collectors {
+            let should_collect = match timed_collector.last_collection {
+                None => true, // First collection
+                Some(last_time) => now.duration_since(last_time) >= timed_collector.interval,
            };

-            let should_collect =
-                if let Some(last_time) = self.last_collection_times.get(collector_name) {
-                    now.duration_since(*last_time).as_secs() >= cache_interval_secs
-                } else {
-                    true // First collection
-                };
-
            if should_collect {
-                collecting_fresh.insert(collector_name.to_string());
-                match collector.collect(&mut self.status_tracker).await {
+                match timed_collector.collector.collect(&mut self.status_tracker).await {
                    Ok(metrics) => {
-                        // Collector returned fresh metrics (debug logging disabled for performance)
-
-                        // Cache all new metrics
-                        for metric in &metrics {
-                            self.cache_manager.cache_metric(metric.clone()).await;
-                        }
-
+                        let metric_count = metrics.len();
                        all_metrics.extend(metrics);
-                        self.last_collection_times
-                            .insert(collector_name.to_string(), now);
+                        timed_collector.last_collection = Some(now);
+                        debug!(
+                            "Collected {} metrics from {} ({}s interval)",
+                            metric_count,
+                            timed_collector.name,
+                            timed_collector.interval.as_secs()
+                        );
                    }
                    Err(e) => {
-                        error!("Collector '{}' failed: {}", collector_name, e);
-                        // Continue with other collectors even if one fails
+                        error!("Collector {} failed: {}", timed_collector.name, e);
                    }
                }
-            } else {
-                let _elapsed = self
-                    .last_collection_times
-                    .get(collector_name)
-                    .map(|t| now.duration_since(*t).as_secs())
-                    .unwrap_or(0);
-                // Collector skipped (debug logging disabled for performance)
            }
        }
-
-        // For 2-second intervals, skip cached metrics to avoid duplicates
-        // (Cache system disabled for realtime updates)
-
-        // Collected metrics total (debug logging disabled for performance)
+        
+        // Update cache with newly collected metrics
+        if !all_metrics.is_empty() {
+            // Merge new metrics with cached metrics (replace by name)
+            for new_metric in &all_metrics {
+                // Remove any existing metric with the same name
+                self.cached_metrics.retain(|cached| cached.name != new_metric.name);
+                // Add the new metric
+                self.cached_metrics.push(new_metric.clone());
+            }
+        }
+        
        Ok(all_metrics)
    }

-
-    /// Get all cached metrics from the cache manager
-    pub async fn get_all_cached_metrics(&self) -> Result<Vec<Metric>> {
-        let cached_metrics = self.cache_manager.get_all_cached_metrics().await?;
-        debug!(
-            "Retrieved {} cached metrics for broadcast",
-            cached_metrics.len()
-        );
-        Ok(cached_metrics)
+    /// Collect metrics from all collectors (legacy method for compatibility)
+    pub async fn collect_all_metrics(&mut self) -> Result<Vec<Metric>> {
+        self.collect_metrics_timed().await
    }
-
-    pub fn get_cache_manager(&self) -> &MetricCacheManager {
-        &self.cache_manager
+    
+    /// Get cached metrics without triggering fresh collection
+    pub fn get_cached_metrics(&self) -> Vec<Metric> {
+        self.cached_metrics.clone()
    }

 }
--- a/agent/src/notifications/mod.rs
+++ b/agent/src/notifications/mod.rs
@@ -59,6 +59,6 @@ impl NotificationManager {
    }

    fn is_maintenance_mode(&self) -> bool {
-        std::fs::metadata("/tmp/cm-maintenance").is_ok()
+        std::fs::metadata(&self.config.maintenance_mode_file).is_ok()
    }
 }
--- a/agent/src/service_tracker.rs
+++ b/agent/src/service_tracker.rs
@@ -0,0 +1,164 @@
+use anyhow::Result;
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+use std::fs;
+use std::path::Path;
+use std::sync::{Arc, Mutex, OnceLock};
+use tracing::{debug, info, warn};
+
+/// Shared instance for global access
+static GLOBAL_TRACKER: OnceLock<Arc<Mutex<UserStoppedServiceTracker>>> = OnceLock::new();
+
+/// Tracks services that have been stopped by user action
+/// These services should be treated as OK status instead of Warning
+#[derive(Debug)]
+pub struct UserStoppedServiceTracker {
+    /// Set of services stopped by user action
+    user_stopped_services: HashSet<String>,
+    /// Path to persistent storage file
+    storage_path: String,
+}
+
+/// Serializable data structure for persistence
+#[derive(Debug, Serialize, Deserialize)]
+struct UserStoppedData {
+    services: Vec<String>,
+}
+
+impl UserStoppedServiceTracker {
+    /// Create new tracker with default storage path
+    pub fn new() -> Self {
+        Self::with_storage_path("/var/lib/cm-dashboard/user-stopped-services.json")
+    }
+
+    /// Initialize global instance (called by agent)
+    pub fn init_global() -> Result<Self> {
+        let tracker = Self::new();
+        
+        // Set global instance
+        let global_instance = Arc::new(Mutex::new(tracker));
+        if GLOBAL_TRACKER.set(global_instance).is_err() {
+            warn!("Global service tracker was already initialized");
+        }
+        
+        // Return a new instance for the agent to use
+        Ok(Self::new())
+    }
+
+    /// Check if a service is user-stopped (global access for collectors)
+    pub fn is_service_user_stopped(service_name: &str) -> bool {
+        if let Some(global) = GLOBAL_TRACKER.get() {
+            if let Ok(tracker) = global.lock() {
+                tracker.is_user_stopped(service_name)
+            } else {
+                debug!("Failed to lock global service tracker");
+                false
+            }
+        } else {
+            debug!("Global service tracker not initialized");
+            false
+        }
+    }
+
+    /// Update global tracker (called by agent when tracker state changes)
+    pub fn update_global(updated_tracker: &UserStoppedServiceTracker) {
+        if let Some(global) = GLOBAL_TRACKER.get() {
+            if let Ok(mut tracker) = global.lock() {
+                tracker.user_stopped_services = updated_tracker.user_stopped_services.clone();
+            } else {
+                debug!("Failed to lock global service tracker for update");
+            }
+        } else {
+            debug!("Global service tracker not initialized for update");
+        }
+    }
+
+    /// Create new tracker with custom storage path
+    pub fn with_storage_path<P: AsRef<Path>>(storage_path: P) -> Self {
+        let storage_path = storage_path.as_ref().to_string_lossy().to_string();
+        let mut tracker = Self {
+            user_stopped_services: HashSet::new(),
+            storage_path,
+        };
+
+        // Load existing data from storage
+        if let Err(e) = tracker.load_from_storage() {
+            warn!("Failed to load user-stopped services from storage: {}", e);
+            info!("Starting with empty user-stopped services list");
+        }
+
+        tracker
+    }
+
+
+    /// Clear user-stopped flag for a service (when user starts it)
+    pub fn clear_user_stopped(&mut self, service_name: &str) -> Result<()> {
+        if self.user_stopped_services.remove(service_name) {
+            info!("Cleared user-stopped flag for service '{}'", service_name);
+            self.save_to_storage()?;
+            debug!("Service '{}' user-stopped flag cleared and saved to storage", service_name);
+        } else {
+            debug!("Service '{}' was not marked as user-stopped", service_name);
+        }
+        Ok(())
+    }
+
+    /// Check if a service is marked as user-stopped
+    pub fn is_user_stopped(&self, service_name: &str) -> bool {
+        let is_stopped = self.user_stopped_services.contains(service_name);
+        debug!("Service '{}' user-stopped status: {}", service_name, is_stopped);
+        is_stopped
+    }
+
+
+    /// Save current state to persistent storage
+    fn save_to_storage(&self) -> Result<()> {
+        // Create parent directory if it doesn't exist
+        if let Some(parent_dir) = Path::new(&self.storage_path).parent() {
+            if !parent_dir.exists() {
+                fs::create_dir_all(parent_dir)?;
+                debug!("Created parent directory: {}", parent_dir.display());
+            }
+        }
+
+        let data = UserStoppedData {
+            services: self.user_stopped_services.iter().cloned().collect(),
+        };
+
+        let json_data = serde_json::to_string_pretty(&data)?;
+        fs::write(&self.storage_path, json_data)?;
+
+        debug!(
+            "Saved {} user-stopped services to {}",
+            data.services.len(),
+            self.storage_path
+        );
+        Ok(())
+    }
+
+    /// Load state from persistent storage
+    fn load_from_storage(&mut self) -> Result<()> {
+        if !Path::new(&self.storage_path).exists() {
+            debug!("Storage file {} does not exist, starting fresh", self.storage_path);
+            return Ok(());
+        }
+
+        let json_data = fs::read_to_string(&self.storage_path)?;
+        let data: UserStoppedData = serde_json::from_str(&json_data)?;
+
+        self.user_stopped_services = data.services.into_iter().collect();
+
+        info!(
+            "Loaded {} user-stopped services from {}",
+            self.user_stopped_services.len(),
+            self.storage_path
+        );
+
+        if !self.user_stopped_services.is_empty() {
+            debug!("User-stopped services: {:?}", self.user_stopped_services);
+        }
+
+        Ok(())
+    }
+}
+
--- a/agent/src/status/mod.rs
+++ b/agent/src/status/mod.rs
@@ -9,7 +9,6 @@ use chrono::Utc;
 pub struct HostStatusConfig {
    pub enabled: bool,
    pub aggregation_method: String, // "worst_case"
-    pub notification_interval_seconds: u64,
 }

 impl Default for HostStatusConfig {
@@ -17,7 +16,6 @@ impl Default for HostStatusConfig {
        Self {
            enabled: true,
            aggregation_method: "worst_case".to_string(),
-            notification_interval_seconds: 30,
        }
    }
 }
@@ -70,7 +68,7 @@ impl HostStatusManager {

    /// Update the status of a specific service and recalculate host status
    /// Updates real-time status and buffers changes for email notifications
-    pub fn update_service_status(&mut self, service: String, status: Status, cache_manager: Option<&crate::cache::MetricCacheManager>) {
+    pub fn update_service_status(&mut self, service: String, status: Status) {
        if !self.config.enabled {
            return;
        }
@@ -82,14 +80,6 @@ impl HostStatusManager {
            return;
        }

-        // Save cache when status changes (clone cache manager reference for async)
-        if let Some(cache) = cache_manager {
-            let cache = cache.clone();
-            tokio::spawn(async move {
-                cache.save_to_disk().await;
-            });
-        }
-
        // Initialize batch if this is the first change
        if self.batch_start_time.is_none() {
            self.batch_start_time = Some(Instant::now());
@@ -168,25 +158,62 @@ impl HostStatusManager {



-    /// Process a metric - updates status (notifications handled separately via batching)
-    pub async fn process_metric(&mut self, metric: &Metric, _notification_manager: &mut crate::notifications::NotificationManager, cache_manager: &crate::cache::MetricCacheManager) {
-        // Just update status - notifications are handled by process_pending_notifications
-        self.update_service_status(metric.name.clone(), metric.status, Some(cache_manager));
+    /// Process a metric - updates status and queues for aggregated notifications if status changed
+    pub async fn process_metric(&mut self, metric: &Metric, _notification_manager: &mut crate::notifications::NotificationManager) -> bool {
+        let old_service_status = self.service_statuses.get(&metric.name).copied();
+        let old_host_status = self.current_host_status;
+        let new_service_status = metric.status;
+        
+        // Update status (this recalculates host status internally)
+        self.update_service_status(metric.name.clone(), new_service_status);
+        
+        let new_host_status = self.current_host_status;
+        let mut status_changed = false;
+        
+        // Check if service status actually changed (ignore first-time status setting)
+        if let Some(old_service_status) = old_service_status {
+            if old_service_status != new_service_status {
+                debug!("Service status change detected for {}: {:?} -> {:?}", metric.name, old_service_status, new_service_status);
+                
+                // Queue change for aggregated notification (not immediate)
+                self.queue_status_change(&metric.name, old_service_status, new_service_status);
+                
+                status_changed = true;
+            }
+        } else {
+            debug!("Initial status set for {}: {:?}", metric.name, new_service_status);
+        }
+        
+        // Check if host status changed (this should trigger immediate transmission)
+        if old_host_status != new_host_status {
+            debug!("Host status change detected: {:?} -> {:?}", old_host_status, new_host_status);
+            status_changed = true;
+        }
+        
+        status_changed // Return true if either service or host status changed
    }

-    /// Process pending notifications - call this at notification intervals
+    /// Queue status change for aggregated notification
+    fn queue_status_change(&mut self, metric_name: &str, old_status: Status, new_status: Status) {
+        // Add to pending changes for aggregated notification
+        let entry = self.pending_changes.entry(metric_name.to_string()).or_insert((old_status, old_status, 0));
+        entry.1 = new_status; // Update final status
+        entry.2 += 1; // Increment change count
+        
+        // Set batch start time if this is the first change
+        if self.batch_start_time.is_none() {
+            self.batch_start_time = Some(Instant::now());
+        }
+    }
+
+
+    /// Process pending notifications - legacy method, now rarely used
    pub async fn process_pending_notifications(&mut self, notification_manager: &mut crate::notifications::NotificationManager) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
        if !self.config.enabled || self.pending_changes.is_empty() {
            return Ok(());
        }

-        let batch_start = self.batch_start_time.unwrap_or_else(Instant::now);
-        let batch_duration = batch_start.elapsed();
-        
-        // Only process if enough time has passed
-        if batch_duration.as_secs() < self.config.notification_interval_seconds {
-            return Ok(());
-        }
+        // Process notifications immediately without interval batching

        // Create aggregated status changes
        let aggregated = self.create_aggregated_changes();
@@ -245,11 +272,13 @@ impl HostStatusManager {
    /// Check if a status change is significant enough for notification
    fn is_significant_change(&self, old_status: Status, new_status: Status) -> bool {
        match (old_status, new_status) {
-            // Always notify on problems
+            // Don't notify on transitions from Unknown (startup/restart scenario)
+            (Status::Unknown, _) => false,
+            // Always notify on problems (but not from Unknown)
            (_, Status::Warning) | (_, Status::Critical) => true,
            // Only notify on recovery if it's from a problem state to OK and all services are OK
            (Status::Warning | Status::Critical, Status::Ok) => self.current_host_status == Status::Ok,
-            // Don't notify on startup or other transitions
+            // Don't notify on other transitions
            _ => false,
        }
    }
@@ -347,8 +376,8 @@ impl HostStatusManager {
            details.push('\n');
        }

-        // Show recoveries
-        if !recovery_changes.is_empty() {
+        // Show recoveries only if host status is now OK (all services recovered)
+        if !recovery_changes.is_empty() && aggregated.host_status_final == Status::Ok {
            details.push_str(&format!("✅ RECOVERIES ({}):\n", recovery_changes.len()));
            for change in recovery_changes {
                details.push_str(&format!("  {}\n", change));
--- a/dashboard/Cargo.toml
+++ b/dashboard/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cm-dashboard"
-version = "0.1.0"
+version = "0.1.78"
 edition = "2021"

 [dependencies]
@@ -18,4 +18,5 @@ tracing-subscriber = { workspace = true }
 ratatui = { workspace = true }
 crossterm = { workspace = true }
 toml = { workspace = true }
-gethostname = { workspace = true }
+gethostname = { workspace = true }
+wake-on-lan = "0.2"
--- a/dashboard/src/app.rs
+++ b/dashboard/src/app.rs
@@ -9,14 +9,13 @@ use std::io;
 use std::time::{Duration, Instant};
 use tracing::{debug, error, info, warn};

-use crate::communication::{AgentCommand, ServiceAction, ZmqCommandSender, ZmqConsumer};
+use crate::communication::{ZmqConsumer};
 use crate::config::DashboardConfig;
 use crate::metrics::MetricStore;
-use crate::ui::{TuiApp, UiCommand};
+use crate::ui::TuiApp;

 pub struct Dashboard {
    zmq_consumer: ZmqConsumer,
-    zmq_command_sender: ZmqCommandSender,
    metric_store: MetricStore,
    tui_app: Option<TuiApp>,
    terminal: Option<Terminal<CrosstermBackend<io::Stdout>>>,
@@ -58,20 +57,9 @@ impl Dashboard {
            }
        };

-        // Initialize ZMQ command sender
-        let zmq_command_sender = match ZmqCommandSender::new(&config.zmq) {
-            Ok(sender) => sender,
-            Err(e) => {
-                error!("Failed to initialize ZMQ command sender: {}", e);
-                return Err(e);
-            }
-        };
-
-        // Connect to predefined hosts from configuration
-        let hosts = config.hosts.predefined_hosts.clone();

        // Try to connect to hosts but don't fail if none are available
-        match zmq_consumer.connect_to_predefined_hosts(&hosts).await {
+        match zmq_consumer.connect_to_predefined_hosts(&config.hosts).await {
            Ok(_) => info!("Successfully connected to ZMQ hosts"),
            Err(e) => {
                warn!(
@@ -91,7 +79,7 @@ impl Dashboard {
            (None, None)
        } else {
            // Initialize TUI app
-            let tui_app = TuiApp::new();
+            let tui_app = TuiApp::new(config.clone());

            // Setup terminal
            if let Err(e) = enable_raw_mode() {
@@ -127,7 +115,6 @@ impl Dashboard {

        Ok(Self {
            zmq_consumer,
-            zmq_command_sender,
            metric_store,
            tui_app,
            terminal,
@@ -137,18 +124,14 @@ impl Dashboard {
        })
    }

-    /// Send a command to a specific agent
-    pub async fn send_command(&mut self, hostname: &str, command: AgentCommand) -> Result<()> {
-        self.zmq_command_sender
-            .send_command(hostname, command)
-            .await
-    }

    pub async fn run(&mut self) -> Result<()> {
        info!("Starting dashboard main loop");

        let mut last_metrics_check = Instant::now();
        let metrics_check_interval = Duration::from_millis(100); // Check for metrics every 100ms
+        let mut last_heartbeat_check = Instant::now();
+        let heartbeat_check_interval = Duration::from_secs(1); // Check for host connectivity every 1 second

        loop {
            // Handle terminal events (keyboard input) only if not headless
@@ -158,16 +141,10 @@ impl Dashboard {
                        match event::read() {
                            Ok(event) => {
                                if let Some(ref mut tui_app) = self.tui_app {
-                                    // Handle input and check for commands
+                                    // Handle input
                                    match tui_app.handle_input(event) {
-                                        Ok(Some(command)) => {
-                                            // Execute the command
-                                            if let Err(e) = self.execute_ui_command(command).await {
-                                                error!("Failed to execute UI command: {}", e);
-                                            }
-                                        }
-                                        Ok(None) => {
-                                            // No command, check if we should quit
+                                        Ok(_) => {
+                                            // Check if we should quit
                                            if tui_app.should_quit() {
                                                info!("Quit requested, exiting dashboard");
                                                break;
@@ -191,6 +168,17 @@ impl Dashboard {
                        break;
                    }
                }
+
+                // Render UI immediately after handling input for responsive feedback
+                if let Some(ref mut terminal) = self.terminal {
+                    if let Some(ref mut tui_app) = self.tui_app {
+                        if let Err(e) = terminal.draw(|frame| {
+                            tui_app.render(frame, &self.metric_store);
+                        }) {
+                            error!("Error rendering TUI after input: {}", e);
+                        }
+                    }
+                }
            }

            // Check for new metrics
@@ -202,78 +190,75 @@ impl Dashboard {
                        metric_message.metrics.len()
                    );

-                    // Check if this is the first time we've seen this host
+                    // Track first contact with host (no command needed - agent sends data every 2s)
                    let is_new_host = !self
                        .initial_commands_sent
                        .contains(&metric_message.hostname);

                    if is_new_host {
                        info!(
-                            "First contact with host {}, sending initial CollectNow command",
+                            "First contact with host {} - data will update automatically",
                            metric_message.hostname
                        );
-
-                        // Send CollectNow command for immediate refresh
-                        if let Err(e) = self
-                            .send_command(&metric_message.hostname, AgentCommand::CollectNow)
-                            .await
-                        {
-                            error!(
-                                "Failed to send initial CollectNow command to {}: {}",
-                                metric_message.hostname, e
-                            );
-                        } else {
-                            info!(
-                                "✓ Sent initial CollectNow command to {}",
-                                metric_message.hostname
-                            );
-                            self.initial_commands_sent
-                                .insert(metric_message.hostname.clone());
-                        }
+                        self.initial_commands_sent
+                            .insert(metric_message.hostname.clone());
                    }

                    // Update metric store
                    self.metric_store
                        .update_metrics(&metric_message.hostname, metric_message.metrics);

-                    // Update TUI with new hosts and metrics (only if not headless)
-                    if let Some(ref mut tui_app) = self.tui_app {
-                        let mut connected_hosts = self
-                            .metric_store
-                            .get_connected_hosts(Duration::from_secs(30));
-                        
-                        // Add hosts that are rebuilding but may be temporarily disconnected
-                        // Use extended timeout (5 minutes) for rebuilding hosts
-                        let rebuilding_hosts = self
-                            .metric_store
-                            .get_connected_hosts(Duration::from_secs(300));
-                        
-                        for host in rebuilding_hosts {
-                            if !connected_hosts.contains(&host) {
-                                // Check if this host is rebuilding in the UI
-                                if tui_app.is_host_rebuilding(&host) {
-                                    connected_hosts.push(host);
-                                }
-                            }
+                    // Check for agent version mismatches across hosts
+                    if let Some((current_version, outdated_hosts)) = self.metric_store.get_version_mismatches() {
+                        for outdated_host in &outdated_hosts {
+                            warn!("Host {} has outdated agent version (current: {})", outdated_host, current_version);
                        }
-                        
-                        tui_app.update_hosts(connected_hosts);
+                    }
+
+                    // Update TUI with new metrics (only if not headless)
+                    if let Some(ref mut tui_app) = self.tui_app {
                        tui_app.update_metrics(&self.metric_store);
                    }
                }
+                
+                // Also check for command output messages
+                if let Ok(Some(cmd_output)) = self.zmq_consumer.receive_command_output().await {
+                    debug!(
+                        "Received command output from {}: {}",
+                        cmd_output.hostname,
+                        cmd_output.output_line
+                    );
+
+                    // Command output (terminal popup removed - output not displayed)
+                }
+                
                last_metrics_check = Instant::now();
            }

+            // Check for host connectivity changes (heartbeat timeouts) periodically
+            if last_heartbeat_check.elapsed() >= heartbeat_check_interval {
+                let timeout = Duration::from_secs(self.config.zmq.heartbeat_timeout_seconds);
+                
+                // Clean up metrics for offline hosts
+                self.metric_store.cleanup_offline_hosts(timeout);
+                
+                if let Some(ref mut tui_app) = self.tui_app {
+                    let connected_hosts = self.metric_store.get_connected_hosts(timeout);
+                    tui_app.update_hosts(connected_hosts);
+                }
+                last_heartbeat_check = Instant::now();
+            }
+
            // Render TUI (only if not headless)
            if !self.headless {
-                if let (Some(ref mut terminal), Some(ref mut tui_app)) =
-                    (&mut self.terminal, &mut self.tui_app)
-                {
-                    if let Err(e) = terminal.draw(|frame| {
-                        tui_app.render(frame, &self.metric_store);
-                    }) {
-                        error!("Error rendering TUI: {}", e);
-                        break;
+                if let Some(ref mut terminal) = self.terminal {
+                    if let Some(ref mut tui_app) = self.tui_app {
+                        if let Err(e) = terminal.draw(|frame| {
+                            tui_app.render(frame, &self.metric_store);
+                        }) {
+                            error!("Error rendering TUI: {}", e);
+                            break;
+                        }
                    }
                }
            }
@@ -286,67 +271,7 @@ impl Dashboard {
        Ok(())
    }

-    /// Execute a UI command by sending it to the appropriate agent
-    async fn execute_ui_command(&self, command: UiCommand) -> Result<()> {
-        match command {
-            UiCommand::ServiceRestart { hostname, service_name } => {
-                info!("Sending restart command for service {} on {}", service_name, hostname);
-                let agent_command = AgentCommand::ServiceControl {
-                    service_name,
-                    action: ServiceAction::Restart,
-                };
-                self.zmq_command_sender.send_command(&hostname, agent_command).await?;
-            }
-            UiCommand::ServiceStart { hostname, service_name } => {
-                info!("Sending start command for service {} on {}", service_name, hostname);
-                let agent_command = AgentCommand::ServiceControl {
-                    service_name: service_name.clone(),
-                    action: ServiceAction::Start,
-                };
-                self.zmq_command_sender.send_command(&hostname, agent_command).await?;
-            }
-            UiCommand::ServiceStop { hostname, service_name } => {
-                info!("Sending stop command for service {} on {}", service_name, hostname);
-                let agent_command = AgentCommand::ServiceControl {
-                    service_name: service_name.clone(),
-                    action: ServiceAction::Stop,
-                };
-                self.zmq_command_sender.send_command(&hostname, agent_command).await?;
-            }
-            UiCommand::SystemRebuild { hostname } => {
-                info!("Sending system rebuild command to {}", hostname);
-                let agent_command = AgentCommand::SystemRebuild { 
-                    git_url: self.config.system.nixos_config_git_url.clone(),
-                    git_branch: self.config.system.nixos_config_branch.clone(),
-                    working_dir: self.config.system.nixos_config_working_dir.clone(),
-                    api_key_file: self.config.system.nixos_config_api_key_file.clone(),
-                };
-                self.zmq_command_sender.send_command(&hostname, agent_command).await?;
-            }
-            UiCommand::TriggerBackup { hostname } => {
-                info!("Trigger backup requested for {}", hostname);
-                // TODO: Implement backup trigger command
-                info!("Backup trigger not yet implemented");
-            }
-        }
-        Ok(())
-    }

-    /// Get current service status from metrics to determine start/stop action
-    fn get_service_status(&self, hostname: &str, service_name: &str) -> Option<String> {
-        let metrics = self.metric_store.get_metrics_for_host(hostname);
-        
-        // Look for systemd service status metric
-        for metric in metrics {
-            if metric.name == format!("systemd_{}_status", service_name) {
-                if let cm_dashboard_shared::MetricValue::String(status) = &metric.value {
-                    return Some(status.clone());
-                }
-            }
-        }
-        
-        None
-    }
 }

 impl Drop for Dashboard {
--- a/dashboard/src/communication/mod.rs
+++ b/dashboard/src/communication/mod.rs
@@ -1,43 +1,10 @@
 use anyhow::Result;
-use cm_dashboard_shared::{MessageEnvelope, MessageType, MetricMessage};
+use cm_dashboard_shared::{CommandOutputMessage, MessageEnvelope, MessageType, MetricMessage};
 use tracing::{debug, error, info, warn};
 use zmq::{Context, Socket, SocketType};

 use crate::config::ZmqConfig;

-/// Commands that can be sent to agents
-#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
-pub enum AgentCommand {
-    /// Request immediate metric collection
-    CollectNow,
-    /// Change collection interval
-    SetInterval { seconds: u64 },
-    /// Enable/disable a collector
-    ToggleCollector { name: String, enabled: bool },
-    /// Request status/health check
-    Ping,
-    /// Control systemd service
-    ServiceControl {
-        service_name: String,
-        action: ServiceAction,
-    },
-    /// Rebuild NixOS system
-    SystemRebuild {
-        git_url: String,
-        git_branch: String,
-        working_dir: String,
-        api_key_file: Option<String>,
-    },
-}
-
-/// Service control actions
-#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
-pub enum ServiceAction {
-    Start,
-    Stop,
-    Restart,
-    Status,
-}

 /// ZMQ consumer for receiving metrics from agents
 pub struct ZmqConsumer {
@@ -83,13 +50,14 @@ impl ZmqConsumer {
        }
    }

-    /// Connect to predefined hosts
-    pub async fn connect_to_predefined_hosts(&mut self, hosts: &[String]) -> Result<()> {
+
+    /// Connect to predefined hosts using their configuration
+    pub async fn connect_to_predefined_hosts(&mut self, hosts: &std::collections::HashMap<String, crate::config::HostDetails>) -> Result<()> {
        let default_port = self.config.subscriber_ports[0];

-        for hostname in hosts {
-            // Try to connect, but don't fail if some hosts are unreachable
-            if let Err(e) = self.connect_to_host(hostname, default_port).await {
+        for (hostname, host_details) in hosts {
+            // Try to connect using configured IP, but don't fail if some hosts are unreachable
+            if let Err(e) = self.connect_to_host_with_details(hostname, host_details, default_port).await {
                warn!("Could not connect to {}: {}", hostname, e);
            }
        }
@@ -103,6 +71,52 @@ impl ZmqConsumer {
        Ok(())
    }

+    /// Connect to a host using its configuration details
+    pub async fn connect_to_host_with_details(&mut self, hostname: &str, host_details: &crate::config::HostDetails, port: u16) -> Result<()> {
+        // Get primary connection IP only - no fallbacks
+        let primary_ip = host_details.get_connection_ip(hostname);
+        
+        // Connect directly without fallback attempts
+        self.connect_to_host(&primary_ip, port).await
+    }
+
+    /// Receive command output from any connected agent (non-blocking)  
+    pub async fn receive_command_output(&mut self) -> Result<Option<CommandOutputMessage>> {
+        match self.subscriber.recv_bytes(zmq::DONTWAIT) {
+            Ok(data) => {
+                // Deserialize envelope
+                let envelope: MessageEnvelope = serde_json::from_slice(&data)
+                    .map_err(|e| anyhow::anyhow!("Failed to deserialize envelope: {}", e))?;
+
+                // Check message type
+                match envelope.message_type {
+                    MessageType::CommandOutput => {
+                        let cmd_output = envelope
+                            .decode_command_output()
+                            .map_err(|e| anyhow::anyhow!("Failed to decode command output: {}", e))?;
+
+                        debug!(
+                            "Received command output from {}: {}",
+                            cmd_output.hostname,
+                            cmd_output.output_line
+                        );
+
+                        Ok(Some(cmd_output))
+                    }
+                    _ => Ok(None), // Not a command output message
+                }
+            }
+            Err(zmq::Error::EAGAIN) => {
+                // No message available (non-blocking mode)
+                Ok(None)
+            }
+            Err(e) => {
+                error!("ZMQ receive error: {}", e);
+                Err(anyhow::anyhow!("ZMQ receive error: {}", e))
+            }
+        }
+    }
+
    /// Receive metrics from any connected agent (non-blocking)
    pub async fn receive_metrics(&mut self) -> Result<Option<MetricMessage>> {
        match self.subscriber.recv_bytes(zmq::DONTWAIT) {
@@ -132,6 +146,10 @@ impl ZmqConsumer {
                        debug!("Received heartbeat");
                        Ok(None) // Don't return heartbeats as metrics
                    }
+                    MessageType::CommandOutput => {
+                        debug!("Received command output (will be handled by receive_command_output)");
+                        Ok(None) // Command output handled by separate method
+                    }
                    _ => {
                        debug!("Received non-metrics message: {:?}", envelope.message_type);
                        Ok(None)
@@ -150,42 +168,3 @@ impl ZmqConsumer {
    }
 }

-/// ZMQ command sender for sending commands to agents
-pub struct ZmqCommandSender {
-    context: Context,
-}
-
-impl ZmqCommandSender {
-    pub fn new(_config: &ZmqConfig) -> Result<Self> {
-        let context = Context::new();
-
-        info!("ZMQ command sender initialized");
-
-        Ok(Self { context })
-    }
-
-    /// Send a command to a specific agent
-    pub async fn send_command(&self, hostname: &str, command: AgentCommand) -> Result<()> {
-        // Create a new PUSH socket for this command (ZMQ best practice)
-        let socket = self.context.socket(SocketType::PUSH)?;
-
-        // Set socket options
-        socket.set_linger(1000)?; // Wait up to 1 second on close
-        socket.set_sndtimeo(5000)?; // 5 second send timeout
-
-        // Connect to agent's command port (6131)
-        let address = format!("tcp://{}:6131", hostname);
-        socket.connect(&address)?;
-
-        // Serialize command
-        let serialized = serde_json::to_vec(&command)?;
-
-        // Send command
-        socket.send(&serialized, 0)?;
-
-        info!("Sent command {:?} to agent at {}", command, hostname);
-
-        // Socket will be automatically closed when dropped
-        Ok(())
-    }
-}
--- a/dashboard/src/config/mod.rs
+++ b/dashboard/src/config/mod.rs
@@ -6,20 +6,40 @@ use std::path::Path;
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct DashboardConfig {
    pub zmq: ZmqConfig,
-    pub hosts: HostsConfig,
+    pub hosts: std::collections::HashMap<String, HostDetails>,
    pub system: SystemConfig,
+    pub ssh: SshConfig,
+    pub service_logs: std::collections::HashMap<String, Vec<ServiceLogConfig>>,
 }

 /// ZMQ consumer configuration
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ZmqConfig {
    pub subscriber_ports: Vec<u16>,
+    /// Heartbeat timeout in seconds - hosts considered offline if no heartbeat received within this time
+    #[serde(default = "default_heartbeat_timeout_seconds")]
+    pub heartbeat_timeout_seconds: u64,
 }

-/// Hosts configuration
+fn default_heartbeat_timeout_seconds() -> u64 {
+    10 // Default to 10 seconds - allows for multiple missed heartbeats
+}
+
+/// Individual host configuration details
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct HostsConfig {
-    pub predefined_hosts: Vec<String>,
+pub struct HostDetails {
+    pub mac_address: Option<String>,
+    /// Primary IP address (local network)
+    pub ip: Option<String>,
+}
+
+
+impl HostDetails {
+    /// Get the IP address for connection (uses ip field or hostname as fallback)
+    pub fn get_connection_ip(&self, hostname: &str) -> String {
+        self.ip.as_ref().unwrap_or(&hostname.to_string()).clone()
+    }
+
 }

 /// System configuration
@@ -31,6 +51,21 @@ pub struct SystemConfig {
    pub nixos_config_api_key_file: Option<String>,
 }

+/// SSH configuration for rebuild and backup operations
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SshConfig {
+    pub rebuild_user: String,
+    pub rebuild_alias: String,
+    pub backup_alias: String,
+}
+
+/// Service log file configuration per host
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ServiceLogConfig {
+    pub service_name: String,
+    pub log_file_path: String,
+}
+
 impl DashboardConfig {
    pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
        let path = path.as_ref();
@@ -52,8 +87,3 @@ impl Default for ZmqConfig {
    }
 }

-impl Default for HostsConfig {
-    fn default() -> Self {
-        panic!("Dashboard configuration must be loaded from file - no hardcoded defaults allowed")
-    }
-}
--- a/dashboard/src/main.rs
+++ b/dashboard/src/main.rs
@@ -1,5 +1,6 @@
 use anyhow::Result;
 use clap::Parser;
+use std::process;
 use tracing::{error, info};
 use tracing_subscriber::EnvFilter;

@@ -11,6 +12,29 @@ mod ui;

 use app::Dashboard;

+
+/// Check if running inside tmux session
+fn check_tmux_session() {
+    // Check for TMUX environment variable which is set when inside a tmux session
+    if std::env::var("TMUX").is_err() {
+        eprintln!("╭─────────────────────────────────────────────────────────────╮");
+        eprintln!("│                        ⚠️  TMUX REQUIRED                      │");
+        eprintln!("├─────────────────────────────────────────────────────────────┤");
+        eprintln!("│  CM Dashboard must be run inside a tmux session for proper   │");
+        eprintln!("│  terminal handling and remote operation functionality.       │");
+        eprintln!("│                                                             │");
+        eprintln!("│  Please start a tmux session first:                        │");
+        eprintln!("│    tmux new-session -d -s dashboard cm-dashboard           │");
+        eprintln!("│    tmux attach-session -t dashboard                        │");
+        eprintln!("│                                                             │");
+        eprintln!("│  Or simply:                                                 │");
+        eprintln!("│    tmux                                                     │");
+        eprintln!("│    cm-dashboard                                             │");
+        eprintln!("╰─────────────────────────────────────────────────────────────╯");
+        process::exit(1);
+    }
+}
+
 #[derive(Parser)]
 #[command(name = "cm-dashboard")]
 #[command(about = "CM Dashboard TUI with individual metric consumption")]
@@ -52,6 +76,11 @@ async fn main() -> Result<()> {
            .init();
    }

+    // Check for tmux session requirement (only for TUI mode)
+    if !cli.headless {
+        check_tmux_session();
+    }
+
    if cli.headless || cli.verbose > 0 {
        info!("CM Dashboard starting with individual metrics architecture...");
    }
--- a/dashboard/src/metrics/store.rs
+++ b/dashboard/src/metrics/store.rs
@@ -11,8 +11,8 @@ pub struct MetricStore {
    current_metrics: HashMap<String, HashMap<String, Metric>>,
    /// Historical metrics for trending
    historical_metrics: HashMap<String, Vec<MetricDataPoint>>,
-    /// Last update timestamp per host
-    last_update: HashMap<String, Instant>,
+    /// Last heartbeat timestamp per host
+    last_heartbeat: HashMap<String, Instant>,
    /// Configuration
    max_metrics_per_host: usize,
    history_retention: Duration,
@@ -23,7 +23,7 @@ impl MetricStore {
        Self {
            current_metrics: HashMap::new(),
            historical_metrics: HashMap::new(),
-            last_update: HashMap::new(),
+            last_heartbeat: HashMap::new(),
            max_metrics_per_host,
            history_retention: Duration::from_secs(history_retention_hours * 3600),
        }
@@ -56,10 +56,13 @@ impl MetricStore {

            // Add to history
            host_history.push(MetricDataPoint { received_at: now });
-        }

-        // Update last update timestamp
-        self.last_update.insert(hostname.to_string(), now);
+            // Track heartbeat metrics for connectivity detection
+            if metric_name == "agent_heartbeat" {
+                self.last_heartbeat.insert(hostname.to_string(), now);
+                debug!("Updated heartbeat for host {}", hostname);
+            }
+        }

        // Get metrics count before cleanup
        let metrics_count = host_metrics.len();
@@ -88,22 +91,46 @@ impl MetricStore {
        }
    }

-    /// Get connected hosts (hosts with recent updates)
+    /// Get connected hosts (hosts with recent heartbeats)
    pub fn get_connected_hosts(&self, timeout: Duration) -> Vec<String> {
        let now = Instant::now();

-        self.last_update
+        self.last_heartbeat
            .iter()
-            .filter_map(|(hostname, &last_update)| {
-                if now.duration_since(last_update) <= timeout {
+            .filter_map(|(hostname, &last_heartbeat)| {
+                if now.duration_since(last_heartbeat) <= timeout {
                    Some(hostname.clone())
                } else {
+                    debug!("Host {} considered offline - last heartbeat was {:?} ago", 
+                           hostname, now.duration_since(last_heartbeat));
                    None
                }
            })
            .collect()
    }

+    /// Clean up data for offline hosts
+    pub fn cleanup_offline_hosts(&mut self, timeout: Duration) {
+        let now = Instant::now();
+        let mut hosts_to_cleanup = Vec::new();
+
+        // Find hosts that are offline (no recent heartbeat)
+        for (hostname, &last_heartbeat) in &self.last_heartbeat {
+            if now.duration_since(last_heartbeat) > timeout {
+                hosts_to_cleanup.push(hostname.clone());
+            }
+        }
+
+        // Clear metrics for offline hosts
+        for hostname in hosts_to_cleanup {
+            if let Some(metrics) = self.current_metrics.remove(&hostname) {
+                info!("Cleared {} metrics for offline host: {}", metrics.len(), hostname);
+            }
+            // Keep heartbeat timestamp for reconnection detection
+            // Don't remove from last_heartbeat to track when host was last seen
+        }
+    }
+
    /// Cleanup old data and enforce limits
    fn cleanup_host_data(&mut self, hostname: &str) {
        let now = Instant::now();
@@ -124,4 +151,52 @@ impl MetricStore {
            }
        }
    }
+
+    /// Get agent versions from all hosts for cross-host comparison
+    pub fn get_agent_versions(&self) -> HashMap<String, String> {
+        let mut versions = HashMap::new();
+        
+        for (hostname, metrics) in &self.current_metrics {
+            if let Some(version_metric) = metrics.get("agent_version") {
+                if let cm_dashboard_shared::MetricValue::String(version) = &version_metric.value {
+                    versions.insert(hostname.clone(), version.clone());
+                }
+            }
+        }
+        
+        versions
+    }
+
+    /// Check for agent version mismatches across hosts
+    pub fn get_version_mismatches(&self) -> Option<(String, Vec<String>)> {
+        let versions = self.get_agent_versions();
+        
+        if versions.len() < 2 {
+            return None; // Need at least 2 hosts to compare
+        }
+        
+        // Find the most common version (assume it's the "current" version)
+        let mut version_counts = HashMap::new();
+        for version in versions.values() {
+            *version_counts.entry(version.clone()).or_insert(0) += 1;
+        }
+        
+        let most_common_version = version_counts
+            .iter()
+            .max_by_key(|(_, count)| *count)
+            .map(|(version, _)| version.clone())?;
+        
+        // Find hosts with different versions
+        let outdated_hosts: Vec<String> = versions
+            .iter()
+            .filter(|(_, version)| *version != &most_common_version)
+            .map(|(hostname, _)| hostname.clone())
+            .collect();
+        
+        if outdated_hosts.is_empty() {
+            None
+        } else {
+            Some((most_common_version, outdated_hosts))
+        }
+    }
 }
--- a/dashboard/src/ui/mod.rs
+++ b/dashboard/src/ui/mod.rs
@@ -1,5 +1,5 @@
 use anyhow::Result;
-use crossterm::event::{Event, KeyCode, KeyModifiers};
+use crossterm::event::{Event, KeyCode};
 use ratatui::{
    layout::{Constraint, Direction, Layout, Rect},
    style::Style,
@@ -7,80 +7,23 @@ use ratatui::{
    Frame,
 };
 use std::collections::HashMap;
-use std::time::{Duration, Instant};
+use std::time::Instant;
 use tracing::info;
+use wake_on_lan::MagicPacket;

 pub mod theme;
 pub mod widgets;

+use crate::config::DashboardConfig;
 use crate::metrics::MetricStore;
-use cm_dashboard_shared::{Metric, Status};
-use theme::{Components, Layout as ThemeLayout, StatusIcons, Theme, Typography};
+use cm_dashboard_shared::Status;
+use theme::{Components, Layout as ThemeLayout, Theme, Typography};
 use widgets::{BackupWidget, ServicesWidget, SystemWidget, Widget};

-/// Commands that can be triggered from the UI
-#[derive(Debug, Clone)]
-pub enum UiCommand {
-    ServiceRestart { hostname: String, service_name: String },
-    ServiceStart { hostname: String, service_name: String },
-    ServiceStop { hostname: String, service_name: String },
-    SystemRebuild { hostname: String },
-    TriggerBackup { hostname: String },
-}

-/// Command execution status for visual feedback
-#[derive(Debug, Clone)]
-pub enum CommandStatus {
-    /// Command is executing
-    InProgress { command_type: CommandType, target: String, start_time: std::time::Instant },
-    /// Command completed successfully
-    Success { command_type: CommandType, target: String, duration: std::time::Duration, completed_at: std::time::Instant },
-    /// Command failed
-    Failed { command_type: CommandType, target: String, error: String, failed_at: std::time::Instant },
-}

-/// Types of commands for status tracking
-#[derive(Debug, Clone)]
-pub enum CommandType {
-    ServiceRestart,
-    ServiceStart,
-    ServiceStop,
-    SystemRebuild,
-    BackupTrigger,
-}

 /// Panel types for focus management
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum PanelType {
-    System,
-    Services,
-    Backup,
-}
-
-impl PanelType {
-    /// Get all panel types in order
-    pub fn all() -> [PanelType; 3] {
-        [PanelType::System, PanelType::Services, PanelType::Backup]
-    }
-    
-    /// Get the next panel in cycle (System → Services → Backup → System)
-    pub fn next(self) -> PanelType {
-        match self {
-            PanelType::System => PanelType::Services,
-            PanelType::Services => PanelType::Backup,
-            PanelType::Backup => PanelType::System,
-        }
-    }
-    
-    /// Get the previous panel in cycle (System ← Services ← Backup ← System)
-    pub fn previous(self) -> PanelType {
-        match self {
-            PanelType::System => PanelType::Backup,
-            PanelType::Services => PanelType::System,
-            PanelType::Backup => PanelType::Services,
-        }
-    }
-}

 /// Widget states for a specific host
 #[derive(Clone)]
@@ -97,8 +40,6 @@ pub struct HostWidgets {
    pub backup_scroll_offset: usize,
    /// Last update time for this host
    pub last_update: Option<Instant>,
-    /// Active command status for visual feedback
-    pub command_status: Option<CommandStatus>,
 }

 impl HostWidgets {
@@ -111,11 +52,11 @@ impl HostWidgets {
            services_scroll_offset: 0,
            backup_scroll_offset: 0,
            last_update: None,
-            command_status: None,
        }
    }
 }

+
 /// Main TUI application
 pub struct TuiApp {
    /// Widget states per host (hostname -> HostWidgets)
@@ -126,25 +67,39 @@ pub struct TuiApp {
    available_hosts: Vec<String>,
    /// Host index for navigation
    host_index: usize,
-    /// Currently focused panel
-    focused_panel: PanelType,
    /// Should quit application
    should_quit: bool,
    /// Track if user manually navigated away from localhost
    user_navigated_away: bool,
+    /// Dashboard configuration
+    config: DashboardConfig,
+    /// Cached localhost hostname to avoid repeated system calls
+    localhost: String,
 }

 impl TuiApp {
-    pub fn new() -> Self {
-        Self {
+    pub fn new(config: DashboardConfig) -> Self {
+        let localhost = gethostname::gethostname().to_string_lossy().to_string();
+        let mut app = Self {
            host_widgets: HashMap::new(),
            current_host: None,
-            available_hosts: Vec::new(),
+            available_hosts: config.hosts.keys().cloned().collect(),
            host_index: 0,
-            focused_panel: PanelType::System, // Start with System panel focused
            should_quit: false,
            user_navigated_away: false,
+            config,
+            localhost,
+        };
+        
+        // Sort predefined hosts
+        app.available_hosts.sort();
+        
+        // Initialize with first host if available
+        if !app.available_hosts.is_empty() {
+            app.current_host = Some(app.available_hosts[0].clone());
        }
+        
+        app
    }

    /// Get or create host widgets for the given hostname
@@ -156,41 +111,39 @@ impl TuiApp {

    /// Update widgets with metrics from store (only for current host)
    pub fn update_metrics(&mut self, metric_store: &MetricStore) {
-        // Check for command timeouts first
-        self.check_command_timeouts();
        
        // Check for rebuild completion by agent hash change
-        self.check_rebuild_completion(metric_store);
        
        if let Some(hostname) = self.current_host.clone() {
            // Only update widgets if we have metrics for this host
            let all_metrics = metric_store.get_metrics_for_host(&hostname);
            if !all_metrics.is_empty() {
-                // Get metrics first while hostname is borrowed
-                let cpu_metrics: Vec<&Metric> = all_metrics
-                    .iter()
-                    .filter(|m| {
-                        m.name.starts_with("cpu_")
-                            || m.name.contains("c_state_")
-                            || m.name.starts_with("process_top_")
-                    })
-                    .copied()
-                    .collect();
-                let memory_metrics: Vec<&Metric> = all_metrics
-                    .iter()
-                    .filter(|m| m.name.starts_with("memory_") || m.name.starts_with("disk_tmp_"))
-                    .copied()
-                    .collect();
-                let service_metrics: Vec<&Metric> = all_metrics
-                    .iter()
-                    .filter(|m| m.name.starts_with("service_"))
-                    .copied()
-                    .collect();
-                let all_backup_metrics: Vec<&Metric> = all_metrics
-                    .iter()
-                    .filter(|m| m.name.starts_with("backup_"))
-                    .copied()
-                    .collect();
+                // Single pass metric categorization for better performance
+                let mut cpu_metrics = Vec::new();
+                let mut memory_metrics = Vec::new();
+                let mut service_metrics = Vec::new();
+                let mut backup_metrics = Vec::new();
+                let mut nixos_metrics = Vec::new();
+                let mut disk_metrics = Vec::new();
+                
+                for metric in all_metrics {
+                    if metric.name.starts_with("cpu_") 
+                        || metric.name.contains("c_state_") 
+                        || metric.name.starts_with("process_top_") {
+                        cpu_metrics.push(metric);
+                    } else if metric.name.starts_with("memory_") || metric.name.starts_with("disk_tmp_") {
+                        memory_metrics.push(metric);
+                    } else if metric.name.starts_with("service_") {
+                        service_metrics.push(metric);
+                    } else if metric.name.starts_with("backup_") {
+                        backup_metrics.push(metric);
+                    } else if metric.name == "system_nixos_build" || metric.name == "system_active_users" || metric.name == "agent_version" {
+                        nixos_metrics.push(metric);
+                    } else if metric.name.starts_with("disk_") {
+                        disk_metrics.push(metric);
+                    }
+                }
+

                // Now get host widgets and update them
                let host_widgets = self.get_or_create_host_widgets(&hostname);
@@ -198,21 +151,7 @@ impl TuiApp {
                // Collect all system metrics (CPU, memory, NixOS, disk/storage)
                let mut system_metrics = cpu_metrics;
                system_metrics.extend(memory_metrics);
-                
-                // Add NixOS metrics - using exact matching for build display fix
-                let nixos_metrics: Vec<&Metric> = all_metrics
-                    .iter()
-                    .filter(|m| m.name == "system_nixos_build" || m.name == "system_active_users" || m.name == "system_agent_hash")
-                    .copied()
-                    .collect();
                system_metrics.extend(nixos_metrics);
-                
-                // Add disk/storage metrics
-                let disk_metrics: Vec<&Metric> = all_metrics
-                    .iter()
-                    .filter(|m| m.name.starts_with("disk_"))
-                    .copied()
-                    .collect();
                system_metrics.extend(disk_metrics);

                host_widgets.system_widget.update_from_metrics(&system_metrics);
@@ -221,7 +160,7 @@ impl TuiApp {
                    .update_from_metrics(&service_metrics);
                host_widgets
                    .backup_widget
-                    .update_from_metrics(&all_backup_metrics);
+                    .update_from_metrics(&backup_metrics);

                host_widgets.last_update = Some(Instant::now());
            }
@@ -229,30 +168,28 @@ impl TuiApp {
    }

    /// Update available hosts with localhost prioritization
-    pub fn update_hosts(&mut self, hosts: Vec<String>) {
-        // Sort hosts alphabetically
-        let mut sorted_hosts = hosts.clone();
+    pub fn update_hosts(&mut self, discovered_hosts: Vec<String>) {
+        // Start with configured hosts (always visible)
+        let mut all_hosts: Vec<String> = self.config.hosts.keys().cloned().collect();
        
-        // Keep hosts that are undergoing SystemRebuild even if they're offline
-        for (hostname, host_widgets) in &self.host_widgets {
-            if let Some(CommandStatus::InProgress { command_type: CommandType::SystemRebuild, .. }) = &host_widgets.command_status {
-                if !sorted_hosts.contains(hostname) {
-                    sorted_hosts.push(hostname.clone());
-                }
+        // Add any discovered hosts that aren't already configured
+        for host in discovered_hosts {
+            if !all_hosts.contains(&host) {
+                all_hosts.push(host);
            }
        }
        
-        sorted_hosts.sort();
-        self.available_hosts = sorted_hosts;
+        
+        all_hosts.sort();
+        self.available_hosts = all_hosts;
        
        // Get the current hostname (localhost) for auto-selection
-        let localhost = gethostname::gethostname().to_string_lossy().to_string();
        if !self.available_hosts.is_empty() {
-            if self.available_hosts.contains(&localhost) && !self.user_navigated_away {
+            if self.available_hosts.contains(&self.localhost) && !self.user_navigated_away {
                // Localhost is available and user hasn't navigated away - switch to it
-                self.current_host = Some(localhost.clone());
+                self.current_host = Some(self.localhost.clone());
                // Find the actual index of localhost in the sorted list
-                self.host_index = self.available_hosts.iter().position(|h| h == &localhost).unwrap_or(0);
+                self.host_index = self.available_hosts.iter().position(|h| h == &self.localhost).unwrap_or(0);
            } else if self.current_host.is_none() {
                // No current host - select first available (which is localhost if available)
                self.current_host = Some(self.available_hosts[0].clone());
@@ -272,7 +209,7 @@ impl TuiApp {
    }

    /// Handle keyboard input
-    pub fn handle_input(&mut self, event: Event) -> Result<Option<UiCommand>> {
+    pub fn handle_input(&mut self, event: Event) -> Result<()> {
        if let Event::Key(key) = event {
            match key.code {
                KeyCode::Char('q') => {
@@ -285,78 +222,226 @@ impl TuiApp {
                    self.navigate_host(1);
                }
                KeyCode::Char('r') => {
-                    match self.focused_panel {
-                        PanelType::System => {
-                            // System rebuild command
-                            if let Some(hostname) = self.current_host.clone() {
-                                self.start_command(&hostname, CommandType::SystemRebuild, hostname.clone());
-                                return Ok(Some(UiCommand::SystemRebuild { hostname }));
-                            }
-                        }
-                        PanelType::Services => {
-                            // Service restart command
-                            if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
-                                self.start_command(&hostname, CommandType::ServiceRestart, service_name.clone());
-                                return Ok(Some(UiCommand::ServiceRestart { hostname, service_name }));
-                            }
-                        }
-                        _ => {
-                            info!("Manual refresh requested");
-                        }
+                    // System rebuild command - works on any panel for current host
+                    if let Some(hostname) = self.current_host.clone() {
+                        let connection_ip = self.get_connection_ip(&hostname);
+                        // Create command that shows logo, rebuilds, and waits for user input
+                        let logo_and_rebuild = format!(
+                            "bash -c 'cat << \"EOF\"\nNixOS System Rebuild\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"bash -ic {}\"\necho\necho \"========================================\"\necho \"Rebuild completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
+                            hostname,
+                            connection_ip,
+                            self.config.ssh.rebuild_user,
+                            connection_ip,
+                            self.config.ssh.rebuild_alias
+                        );
+                        
+                        std::process::Command::new("tmux")
+                            .arg("split-window")
+                            .arg("-v")
+                            .arg("-p")
+                            .arg("30")
+                            .arg(&logo_and_rebuild)
+                            .spawn()
+                            .ok(); // Ignore errors, tmux will handle them
+                    }
+                }
+                KeyCode::Char('B') => {
+                    // Backup command - works on any panel for current host
+                    if let Some(hostname) = self.current_host.clone() {
+                        let connection_ip = self.get_connection_ip(&hostname);
+                        // Create command that shows logo, runs backup, and waits for user input
+                        let logo_and_backup = format!(
+                            "bash -c 'cat << \"EOF\"\nBackup Operation\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"bash -ic {}\"\necho\necho \"========================================\"\necho \"Backup completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
+                            hostname,
+                            connection_ip,
+                            self.config.ssh.rebuild_user,
+                            connection_ip,
+                            self.config.ssh.backup_alias
+                        );
+                        
+                        std::process::Command::new("tmux")
+                            .arg("split-window")
+                            .arg("-v")
+                            .arg("-p")
+                            .arg("30")
+                            .arg(&logo_and_backup)
+                            .spawn()
+                            .ok(); // Ignore errors, tmux will handle them
                    }
                }
                KeyCode::Char('s') => {
-                    if self.focused_panel == PanelType::Services {
-                        // Service start command
-                        if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
-                            self.start_command(&hostname, CommandType::ServiceStart, service_name.clone());
-                            return Ok(Some(UiCommand::ServiceStart { hostname, service_name }));
-                        }
+                    // Service start command via SSH with progress display
+                    if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
+                        let connection_ip = self.get_connection_ip(&hostname);
+                        let service_start_command = format!(
+                            "bash -c 'cat << \"EOF\"\nService Start: {}.service\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"echo \\\"Starting service...\\\" && sudo systemctl start {}.service && echo \\\"Following logs until service is active...\\\" && echo \\\"========================================\\\" && {{ sudo journalctl -u {}.service -f --no-pager -n 10 & JOURNAL_PID=\\$!; while true; do if sudo systemctl is-active {}.service --quiet; then echo; echo \\\"========================================\\\"; echo \\\"Service is now active!\\\"; kill \\$JOURNAL_PID 2>/dev/null; break; fi; sleep 1; done; wait \\$JOURNAL_PID 2>/dev/null; }} && sudo systemctl status {}.service --no-pager -l\"\necho\necho \"========================================\"\necho \"Operation completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
+                            service_name,
+                            hostname,
+                            connection_ip,
+                            self.config.ssh.rebuild_user,
+                            connection_ip,
+                            service_name,
+                            service_name,
+                            service_name,
+                            service_name
+                        );
+                        
+                        std::process::Command::new("tmux")
+                            .arg("split-window")
+                            .arg("-v")
+                            .arg("-p")
+                            .arg("30")
+                            .arg(&service_start_command)
+                            .spawn()
+                            .ok(); // Ignore errors, tmux will handle them
                    }
                }
                KeyCode::Char('S') => {
-                    if self.focused_panel == PanelType::Services {
-                        // Service stop command
-                        if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
-                            self.start_command(&hostname, CommandType::ServiceStop, service_name.clone());
-                            return Ok(Some(UiCommand::ServiceStop { hostname, service_name }));
+                    // Service stop command via SSH with progress display
+                    if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
+                        let connection_ip = self.get_connection_ip(&hostname);
+                        let service_stop_command = format!(
+                            "bash -c 'cat << \"EOF\"\nService Stop: {}.service\nTarget: {} ({})\n\nEOF\nssh -tt {}@{} \"echo \\\"Stopping service...\\\" && sudo systemctl stop {}.service && echo \\\"Service stopped! Final logs:\\\" && echo \\\"========================================\\\" && sudo journalctl -u {}.service --no-pager -n 10 && echo \\\"========================================\\\" && sudo systemctl status {}.service --no-pager -l\"\necho\necho \"========================================\"\necho \"Operation completed. Press any key to close...\"\necho \"========================================\"\nread -n 1 -s\nexit'",
+                            service_name,
+                            hostname,
+                            connection_ip,
+                            self.config.ssh.rebuild_user,
+                            connection_ip,
+                            service_name,
+                            service_name,
+                            service_name
+                        );
+                        
+                        std::process::Command::new("tmux")
+                            .arg("split-window")
+                            .arg("-v")
+                            .arg("-p")
+                            .arg("30")
+                            .arg(&service_stop_command)
+                            .spawn()
+                            .ok(); // Ignore errors, tmux will handle them
+                    }
+                }
+                KeyCode::Char('J') => {
+                    // Show service logs via journalctl in tmux split window
+                    if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
+                        let connection_ip = self.get_connection_ip(&hostname);
+                        let journalctl_command = format!(
+                            "bash -c \"ssh -tt {}@{} 'sudo journalctl -u {}.service -f --no-pager -n 50'; exit\"",
+                            self.config.ssh.rebuild_user,
+                            connection_ip,
+                            service_name
+                        );
+                        
+                        std::process::Command::new("tmux")
+                            .arg("split-window")
+                            .arg("-v")
+                            .arg("-p")
+                            .arg("30")
+                            .arg(&journalctl_command)
+                            .spawn()
+                            .ok(); // Ignore errors, tmux will handle them
+                    }
+                }
+                KeyCode::Char('L') => {
+                    // Show custom service log file in tmux split window
+                    if let (Some(service_name), Some(hostname)) = (self.get_selected_service(), self.current_host.clone()) {
+                        // Check if this service has a custom log file configured
+                        if let Some(host_logs) = self.config.service_logs.get(&hostname) {
+                            if let Some(log_config) = host_logs.iter().find(|config| config.service_name == service_name) {
+                                let connection_ip = self.get_connection_ip(&hostname);
+                                let tail_command = format!(
+                                    "bash -c \"ssh -tt {}@{} 'sudo tail -n 50 -f {}'; exit\"",
+                                    self.config.ssh.rebuild_user,
+                                    connection_ip,
+                                    log_config.log_file_path
+                                );
+                                
+                                std::process::Command::new("tmux")
+                                    .arg("split-window")
+                                    .arg("-v")
+                                    .arg("-p")
+                                    .arg("30")
+                                    .arg(&tail_command)
+                                    .spawn()
+                                    .ok(); // Ignore errors, tmux will handle them
+                            }
                        }
                    }
                }
-                KeyCode::Char('b') => {
-                    if self.focused_panel == PanelType::Backup {
-                        // Trigger backup
-                        if let Some(hostname) = self.current_host.clone() {
-                            self.start_command(&hostname, CommandType::BackupTrigger, hostname.clone());
-                            return Ok(Some(UiCommand::TriggerBackup { hostname }));
+                KeyCode::Char('w') => {
+                    // Wake on LAN for offline hosts
+                    if let Some(hostname) = self.current_host.clone() {
+                        // Check if host has MAC address configured
+                        if let Some(host_details) = self.config.hosts.get(&hostname) {
+                            if let Some(mac_address) = &host_details.mac_address {
+                                // Parse MAC address and send WoL packet
+                                let mac_bytes = Self::parse_mac_address(mac_address);
+                                match mac_bytes {
+                                    Ok(mac) => {
+                                        match MagicPacket::new(&mac).send() {
+                                            Ok(_) => {
+                                                info!("WakeOnLAN packet sent successfully to {} ({})", hostname, mac_address);
+                                            }
+                                            Err(e) => {
+                                                tracing::error!("Failed to send WakeOnLAN packet to {}: {}", hostname, e);
+                                            }
+                                        }
+                                    }
+                                    Err(_) => {
+                                        tracing::error!("Invalid MAC address format for {}: {}", hostname, mac_address);
+                                    }
+                                }
+                            }
                        }
                    }
                }
+                KeyCode::Char('t') => {
+                    // Open SSH terminal session in tmux window
+                    if let Some(hostname) = self.current_host.clone() {
+                        let connection_ip = self.get_connection_ip(&hostname);
+                        let ssh_command = format!(
+                            "ssh -tt {}@{}",
+                            self.config.ssh.rebuild_user,
+                            connection_ip
+                        );
+                        
+                        std::process::Command::new("tmux")
+                            .arg("split-window")
+                            .arg("-v")
+                            .arg("-p")
+                            .arg("30") // Use 30% like other commands
+                            .arg(&ssh_command)
+                            .spawn()
+                            .ok(); // Ignore errors, tmux will handle them
+                    }
+                }
                KeyCode::Tab => {
-                    if key.modifiers.contains(KeyModifiers::SHIFT) {
-                        // Shift+Tab cycles through panels
-                        self.next_panel();
-                    } else {
-                        // Tab cycles to next host
-                        self.navigate_host(1);
+                    // Tab cycles to next host
+                    self.navigate_host(1);
+                }
+                KeyCode::Up | KeyCode::Char('k') => {
+                    // Move service selection up
+                    if let Some(hostname) = self.current_host.clone() {
+                        let host_widgets = self.get_or_create_host_widgets(&hostname);
+                        host_widgets.services_widget.select_previous();
                    }
                }
-                KeyCode::BackTab => {
-                    // BackTab (Shift+Tab on some terminals) also cycles panels
-                    self.next_panel();
-                }
-                KeyCode::Up => {
-                    // Scroll up in focused panel
-                    self.scroll_focused_panel(-1);
-                }
-                KeyCode::Down => {
-                    // Scroll down in focused panel
-                    self.scroll_focused_panel(1);
+                KeyCode::Down | KeyCode::Char('j') => {
+                    // Move service selection down
+                    if let Some(hostname) = self.current_host.clone() {
+                        let total_services = {
+                            let host_widgets = self.get_or_create_host_widgets(&hostname);
+                            host_widgets.services_widget.get_total_services_count()
+                        };
+                        let host_widgets = self.get_or_create_host_widgets(&hostname);
+                        host_widgets.services_widget.select_next(total_services);
+                    }
                }
                _ => {}
            }
        }
-        Ok(None)
+        Ok(())
    }

    /// Navigate between hosts
@@ -379,9 +464,8 @@ impl TuiApp {
        self.current_host = Some(self.available_hosts[self.host_index].clone());
        
        // Check if user navigated away from localhost
-        let localhost = gethostname::gethostname().to_string_lossy().to_string();
        if let Some(ref current) = self.current_host {
-            if current != &localhost {
+            if current != &self.localhost {
                self.user_navigated_away = true;
            } else {
                self.user_navigated_away = false; // User navigated back to localhost
@@ -391,66 +475,9 @@ impl TuiApp {
        info!("Switched to host: {}", self.current_host.as_ref().unwrap());
    }

-    /// Check if a host is currently rebuilding
-    pub fn is_host_rebuilding(&self, hostname: &str) -> bool {
-        if let Some(host_widgets) = self.host_widgets.get(hostname) {
-            matches!(
-                &host_widgets.command_status,
-                Some(CommandStatus::InProgress { command_type: CommandType::SystemRebuild, .. })
-            )
-        } else {
-            false
-        }
-    }

-    /// Switch to next panel (Shift+Tab) - only cycles through visible panels
-    pub fn next_panel(&mut self) {
-        let visible_panels = self.get_visible_panels();
-        if visible_panels.len() <= 1 {
-            return; // Can't switch if only one or no panels visible
-        }
-        
-        // Find current panel index in visible panels
-        if let Some(current_index) = visible_panels.iter().position(|&p| p == self.focused_panel) {
-            // Move to next visible panel
-            let next_index = (current_index + 1) % visible_panels.len();
-            self.focused_panel = visible_panels[next_index];
-        } else {
-            // Current panel not visible, switch to first visible panel
-            self.focused_panel = visible_panels[0];
-        }
-        
-        info!("Switched to panel: {:?}", self.focused_panel);
-    }

-    /// Switch to previous panel (Shift+Tab in reverse) - only cycles through visible panels
-    pub fn previous_panel(&mut self) {
-        let visible_panels = self.get_visible_panels();
-        if visible_panels.len() <= 1 {
-            return; // Can't switch if only one or no panels visible
-        }
-        
-        // Find current panel index in visible panels
-        if let Some(current_index) = visible_panels.iter().position(|&p| p == self.focused_panel) {
-            // Move to previous visible panel
-            let prev_index = if current_index == 0 {
-                visible_panels.len() - 1
-            } else {
-                current_index - 1
-            };
-            self.focused_panel = visible_panels[prev_index];
-        } else {
-            // Current panel not visible, switch to last visible panel
-            self.focused_panel = visible_panels[visible_panels.len() - 1];
-        }
-        
-        info!("Switched to panel: {:?}", self.focused_panel);
-    }

-    /// Get the currently focused panel
-    pub fn get_focused_panel(&self) -> PanelType {
-        self.focused_panel
-    }

    /// Get the currently selected service name from the services widget
    fn get_selected_service(&self) -> Option<String> {
@@ -462,188 +489,17 @@ impl TuiApp {
        None
    }

-    /// Get command status for current host
-    pub fn get_command_status(&self) -> Option<&CommandStatus> {
-        if let Some(hostname) = &self.current_host {
-            if let Some(host_widgets) = self.host_widgets.get(hostname) {
-                return host_widgets.command_status.as_ref();
-            }
-        }
-        None
-    }

    /// Should quit application
    pub fn should_quit(&self) -> bool {
        self.should_quit
    }

-    /// Start command execution and track status for visual feedback
-    pub fn start_command(&mut self, hostname: &str, command_type: CommandType, target: String) {
-        if let Some(host_widgets) = self.host_widgets.get_mut(hostname) {
-            host_widgets.command_status = Some(CommandStatus::InProgress {
-                command_type,
-                target,
-                start_time: Instant::now(),
-            });
-        }
-    }

-    /// Mark command as completed successfully
-    pub fn complete_command(&mut self, hostname: &str) {
-        if let Some(host_widgets) = self.host_widgets.get_mut(hostname) {
-            if let Some(CommandStatus::InProgress { command_type, target, start_time }) = &host_widgets.command_status {
-                let duration = start_time.elapsed();
-                host_widgets.command_status = Some(CommandStatus::Success {
-                    command_type: command_type.clone(),
-                    target: target.clone(),
-                    duration,
-                    completed_at: Instant::now(),
-                });
-            }
-        }
-    }

-    /// Mark command as failed
-    pub fn fail_command(&mut self, hostname: &str, error: String) {
-        if let Some(host_widgets) = self.host_widgets.get_mut(hostname) {
-            if let Some(CommandStatus::InProgress { command_type, target, .. }) = &host_widgets.command_status {
-                host_widgets.command_status = Some(CommandStatus::Failed {
-                    command_type: command_type.clone(),
-                    target: target.clone(),
-                    error,
-                    failed_at: Instant::now(),
-                });
-            }
-        }
-    }

-    /// Check for command timeouts and automatically clear them
-    pub fn check_command_timeouts(&mut self) {
-        let now = Instant::now();
-        let mut hosts_to_clear = Vec::new();
-        
-        for (hostname, host_widgets) in &self.host_widgets {
-            if let Some(CommandStatus::InProgress { command_type, start_time, .. }) = &host_widgets.command_status {
-                let timeout_duration = match command_type {
-                    CommandType::SystemRebuild => Duration::from_secs(300), // 5 minutes for rebuilds
-                    _ => Duration::from_secs(30), // 30 seconds for service commands
-                };
-                
-                if now.duration_since(*start_time) > timeout_duration {
-                    hosts_to_clear.push(hostname.clone());
-                }
-            }
-            // Also clear success/failed status after display time
-            else if let Some(CommandStatus::Success { completed_at, .. }) = &host_widgets.command_status {
-                if now.duration_since(*completed_at) > Duration::from_secs(3) {
-                    hosts_to_clear.push(hostname.clone());
-                }
-            }
-            else if let Some(CommandStatus::Failed { failed_at, .. }) = &host_widgets.command_status {
-                if now.duration_since(*failed_at) > Duration::from_secs(5) {
-                    hosts_to_clear.push(hostname.clone());
-                }
-            }
-        }
-        
-        // Clear timed out commands
-        for hostname in hosts_to_clear {
-            if let Some(host_widgets) = self.host_widgets.get_mut(&hostname) {
-                host_widgets.command_status = None;
-            }
-        }
-    }

-    /// Check for rebuild completion by detecting agent hash changes
-    pub fn check_rebuild_completion(&mut self, metric_store: &MetricStore) {
-        let mut hosts_to_complete = Vec::new();
-        
-        for (hostname, host_widgets) in &self.host_widgets {
-            if let Some(CommandStatus::InProgress { command_type: CommandType::SystemRebuild, .. }) = &host_widgets.command_status {
-                // Check if agent hash has changed (indicating successful rebuild)
-                if let Some(agent_hash_metric) = metric_store.get_metric(hostname, "system_agent_hash") {
-                    if let cm_dashboard_shared::MetricValue::String(current_hash) = &agent_hash_metric.value {
-                        // Compare with stored hash (if we have one)
-                        if let Some(stored_hash) = host_widgets.system_widget.get_agent_hash() {
-                            if current_hash != stored_hash {
-                                // Agent hash changed - rebuild completed successfully
-                                hosts_to_complete.push(hostname.clone());
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        
-        // Mark rebuilds as completed
-        for hostname in hosts_to_complete {
-            self.complete_command(&hostname);
-        }
-    }

-    /// Scroll the focused panel up or down
-    pub fn scroll_focused_panel(&mut self, direction: i32) {
-        if let Some(hostname) = self.current_host.clone() {
-            let focused_panel = self.focused_panel; // Get the value before borrowing
-            let host_widgets = self.get_or_create_host_widgets(&hostname);
-            
-            match focused_panel {
-                PanelType::System => {
-                    if direction > 0 {
-                        host_widgets.system_scroll_offset = host_widgets.system_scroll_offset.saturating_add(1);
-                    } else {
-                        host_widgets.system_scroll_offset = host_widgets.system_scroll_offset.saturating_sub(1);
-                    }
-                    info!("System panel scroll offset: {}", host_widgets.system_scroll_offset);
-                }
-                PanelType::Services => {
-                    // For services panel, Up/Down moves selection cursor, not scroll
-                    let total_services = host_widgets.services_widget.get_total_services_count();
-                    
-                    if direction > 0 {
-                        host_widgets.services_widget.select_next(total_services);
-                        info!("Services selection moved down");
-                    } else {
-                        host_widgets.services_widget.select_previous();
-                        info!("Services selection moved up");
-                    }
-                }
-                PanelType::Backup => {
-                    if direction > 0 {
-                        host_widgets.backup_scroll_offset = host_widgets.backup_scroll_offset.saturating_add(1);
-                    } else {
-                        host_widgets.backup_scroll_offset = host_widgets.backup_scroll_offset.saturating_sub(1);
-                    }
-                    info!("Backup panel scroll offset: {}", host_widgets.backup_scroll_offset);
-                }
-            }
-        }
-    }
-
-    /// Get total count of services for bounds checking
-    fn get_total_services_count(&self, hostname: &str) -> usize {
-        if let Some(host_widgets) = self.host_widgets.get(hostname) {
-            host_widgets.services_widget.get_total_services_count()
-        } else {
-            0
-        }
-    }
-
-    /// Get list of currently visible panels
-    fn get_visible_panels(&self) -> Vec<PanelType> {
-        let mut visible_panels = vec![PanelType::System, PanelType::Services];
-        
-        // Check if backup panel should be shown
-        if let Some(hostname) = &self.current_host {
-            if let Some(host_widgets) = self.host_widgets.get(hostname) {
-                if host_widgets.backup_widget.has_data() {
-                    visible_panels.push(PanelType::Backup);
-                }
-            }
-        }
-        
-        visible_panels
-    }

    /// Render the dashboard (real btop-style multi-panel layout)
    pub fn render(&mut self, frame: &mut Frame, metric_store: &MetricStore) {
@@ -675,6 +531,21 @@ impl TuiApp {
            ])
            .split(main_chunks[1]); // main_chunks[1] is now the content area (between title and statusbar)

+        // Check if current host is offline
+        let current_host_offline = if let Some(hostname) = self.current_host.clone() {
+            self.calculate_host_status(&hostname, metric_store) == Status::Offline
+        } else {
+            true // No host selected is considered offline
+        };
+
+        // If host is offline, render wake-up message instead of panels
+        if current_host_offline {
+            self.render_offline_host_message(frame, main_chunks[1]);
+            self.render_btop_title(frame, main_chunks[0], metric_store);
+            self.render_statusbar(frame, main_chunks[2]);
+            return;
+        }
+
        // Check if backup panel should be shown
        let show_backup = if let Some(hostname) = self.current_host.clone() {
            let host_widgets = self.get_or_create_host_widgets(&hostname);
@@ -712,19 +583,20 @@ impl TuiApp {

        // Render services widget for current host
        if let Some(hostname) = self.current_host.clone() {
-            let is_focused = self.focused_panel == PanelType::Services;
-            let (scroll_offset, command_status) = {
+            let is_focused = true; // Always show service selection
+            let scroll_offset = {
                let host_widgets = self.get_or_create_host_widgets(&hostname);
-                (host_widgets.services_scroll_offset, host_widgets.command_status.clone())
+                host_widgets.services_scroll_offset
            };
            let host_widgets = self.get_or_create_host_widgets(&hostname);
            host_widgets
                .services_widget
-                .render_with_command_status(frame, content_chunks[1], is_focused, scroll_offset, command_status.as_ref()); // Services takes full right side
+                .render(frame, content_chunks[1], is_focused, scroll_offset); // Services takes full right side
        }

        // Render statusbar at the bottom
        self.render_statusbar(frame, main_chunks[2]); // main_chunks[2] is the statusbar area
+
    }

    /// Render btop-style minimal title with host status colors
@@ -735,71 +607,90 @@ impl TuiApp {

        if self.available_hosts.is_empty() {
            let title_text = "cm-dashboard • no hosts discovered";
-            let title = Paragraph::new(title_text).style(Typography::title());
+            let title = Paragraph::new(title_text)
+                .style(Style::default().fg(Theme::background()).bg(Theme::status_color(Status::Unknown)));
            frame.render_widget(title, area);
            return;
        }

-        // Create spans for each host with status indicators
-        let mut spans = vec![Span::styled("cm-dashboard • ", Typography::title())];
+        // Calculate worst-case status across all hosts (excluding offline)
+        let mut worst_status = Status::Ok;
+        for host in &self.available_hosts {
+            let host_status = self.calculate_host_status(host, metric_store);
+            // Don't include offline hosts in status aggregation
+            if host_status != Status::Offline {
+                worst_status = Status::aggregate(&[worst_status, host_status]);
+            }
+        }

+        // Use the worst status color as background
+        let background_color = Theme::status_color(worst_status);
+
+        // Split the title bar into left and right sections
+        let chunks = Layout::default()
+            .direction(Direction::Horizontal)
+            .constraints([Constraint::Length(15), Constraint::Min(0)])
+            .split(area);
+
+        // Left side: "cm-dashboard" text
+        let left_span = Span::styled(
+            " cm-dashboard", 
+            Style::default().fg(Theme::background()).bg(background_color).add_modifier(Modifier::BOLD)
+        );
+        let left_title = Paragraph::new(Line::from(vec![left_span]))
+            .style(Style::default().bg(background_color));
+        frame.render_widget(left_title, chunks[0]);
+
+        // Right side: hosts with status indicators
+        let mut host_spans = Vec::new();
+        
        for (i, host) in self.available_hosts.iter().enumerate() {
            if i > 0 {
-                spans.push(Span::styled(" ", Typography::title()));
+                host_spans.push(Span::styled(
+                    " ", 
+                    Style::default().fg(Theme::background()).bg(background_color)
+                ));
            }

-            // Check if this host has a command status that affects the icon
-            let (status_icon, status_color) = if let Some(host_widgets) = self.host_widgets.get(host) {
-                match &host_widgets.command_status {
-                    Some(CommandStatus::InProgress { command_type: CommandType::SystemRebuild, .. }) => {
-                        // Show blue circular arrow during rebuild
-                        ("↻", Theme::highlight())
-                    }
-                    Some(CommandStatus::Success { command_type: CommandType::SystemRebuild, .. }) => {
-                        // Show green checkmark for successful rebuild
-                        ("✓", Theme::success())
-                    }
-                    Some(CommandStatus::Failed { command_type: CommandType::SystemRebuild, .. }) => {
-                        // Show red X for failed rebuild
-                        ("✗", Theme::error())
-                    }
-                    _ => {
-                        // Normal status icon based on metrics
-                        let host_status = self.calculate_host_status(host, metric_store);
-                        (StatusIcons::get_icon(host_status), Theme::status_color(host_status))
-                    }
-                }
-            } else {
-                // No host widgets yet, use normal status
-                let host_status = self.calculate_host_status(host, metric_store);
-                (StatusIcons::get_icon(host_status), Theme::status_color(host_status))
-            };
+            // Always show normal status icon based on metrics (no command status at host level)
+            let host_status = self.calculate_host_status(host, metric_store);
+            let status_icon = StatusIcons::get_icon(host_status);

-            // Add status icon
-            spans.push(Span::styled(
+            // Add status icon with background color as foreground against status background
+            host_spans.push(Span::styled(
                format!("{} ", status_icon),
-                Style::default().fg(status_color),
+                Style::default().fg(Theme::background()).bg(background_color),
            ));

            if Some(host) == self.current_host.as_ref() {
-                // Selected host in bold bright white
-                spans.push(Span::styled(
+                // Selected host in bold background color against status background
+                host_spans.push(Span::styled(
                    host.clone(),
-                    Typography::title().add_modifier(Modifier::BOLD),
+                    Style::default()
+                        .fg(Theme::background())
+                        .bg(background_color)
+                        .add_modifier(Modifier::BOLD),
                ));
            } else {
-                // Other hosts in normal style with status color
-                spans.push(Span::styled(
+                // Other hosts in normal background color against status background
+                host_spans.push(Span::styled(
                    host.clone(),
-                    Style::default().fg(status_color),
+                    Style::default().fg(Theme::background()).bg(background_color),
                ));
            }
        }

-        let title_line = Line::from(spans);
-        let title = Paragraph::new(vec![title_line]);
+        // Add right padding
+        host_spans.push(Span::styled(
+            " ", 
+            Style::default().fg(Theme::background()).bg(background_color)
+        ));

-        frame.render_widget(title, area);
+        let host_line = Line::from(host_spans);
+        let host_title = Paragraph::new(vec![host_line])
+            .style(Style::default().bg(background_color))
+            .alignment(ratatui::layout::Alignment::Right);
+        frame.render_widget(host_title, chunks[1]);
    }

    /// Calculate overall status for a host based on its metrics
@@ -807,7 +698,7 @@ impl TuiApp {
        let metrics = metric_store.get_metrics_for_host(hostname);

        if metrics.is_empty() {
-            return Status::Unknown;
+            return Status::Offline;
        }

        // First check if we have the aggregated host status summary from the agent
@@ -827,7 +718,9 @@ impl TuiApp {
                Status::Warning => has_warning = true,
                Status::Pending => has_pending = true,
                Status::Ok => ok_count += 1,
-                Status::Unknown => {} // Ignore unknown for aggregation
+                Status::Inactive => ok_count += 1, // Treat inactive as OK for aggregation
+                Status::Unknown => {}, // Ignore unknown for aggregation
+                Status::Offline => {}, // Ignore offline for aggregation
            }
        }

@@ -862,39 +755,22 @@ impl TuiApp {
        let mut shortcuts = Vec::new();
        
        // Global shortcuts
-        shortcuts.push("Tab: Switch Host".to_string());
-        shortcuts.push("Shift+Tab: Switch Panel".to_string());
-        
-        // Scroll shortcuts (always available)
-        shortcuts.push("↑↓: Scroll".to_string());
-        
-        // Panel-specific shortcuts
-        match self.focused_panel {
-            PanelType::System => {
-                shortcuts.push("R: Rebuild".to_string());
-            }
-            PanelType::Services => {
-                shortcuts.push("S: Start".to_string());
-                shortcuts.push("Shift+S: Stop".to_string());
-                shortcuts.push("R: Restart".to_string());
-            }
-            PanelType::Backup => {
-                shortcuts.push("B: Trigger Backup".to_string());
-            }
-        }
+        shortcuts.push("Tab: Host".to_string());
+        shortcuts.push("↑↓/jk: Select".to_string());
+        shortcuts.push("r: Rebuild".to_string());
+        shortcuts.push("s/S: Start/Stop".to_string());
+        shortcuts.push("J: Logs".to_string());
+        shortcuts.push("L: Custom".to_string());
+        shortcuts.push("w: Wake".to_string());
        
        // Always show quit
-        shortcuts.push("Q: Quit".to_string());
+        shortcuts.push("q: Quit".to_string());
        
        shortcuts
    }

    fn render_system_panel(&mut self, frame: &mut Frame, area: Rect, _metric_store: &MetricStore) {
-        let system_block = if self.focused_panel == PanelType::System {
-            Components::focused_widget_block("system")
-        } else {
-            Components::widget_block("system")
-        };
+        let system_block = Components::widget_block("system");
        let inner_area = system_block.inner(area);
        frame.render_widget(system_block, area);
        // Get current host widgets, create if none exist
@@ -903,17 +779,15 @@ impl TuiApp {
                let host_widgets = self.get_or_create_host_widgets(&hostname);
                host_widgets.system_scroll_offset
            };
+            // Clone the config to avoid borrowing issues
+            let config = self.config.clone();
            let host_widgets = self.get_or_create_host_widgets(&hostname);
-            host_widgets.system_widget.render_with_scroll(frame, inner_area, scroll_offset);
+            host_widgets.system_widget.render_with_scroll(frame, inner_area, scroll_offset, &hostname, Some(&config));
        }
    }

    fn render_backup_panel(&mut self, frame: &mut Frame, area: Rect) {
-        let backup_block = if self.focused_panel == PanelType::Backup {
-            Components::focused_widget_block("backup")
-        } else {
-            Components::widget_block("backup")
-        };
+        let backup_block = Components::widget_block("backup");
        let inner_area = backup_block.inner(area);
        frame.render_widget(backup_block, area);

@@ -928,297 +802,100 @@ impl TuiApp {
        }
    }

-    fn render_storage_section(&self, frame: &mut Frame, area: Rect, metric_store: &MetricStore) {
-        if area.height < 2 {
-            return;
+    /// Render offline host message with wake-up option
+    fn render_offline_host_message(&self, frame: &mut Frame, area: Rect) {
+        use ratatui::layout::Alignment;
+        use ratatui::style::Modifier;
+        use ratatui::text::{Line, Span};
+        use ratatui::widgets::{Block, Borders, Paragraph};
+
+        // Get hostname for message
+        let hostname = self.current_host.as_ref()
+            .map(|h| h.as_str())
+            .unwrap_or("Unknown");
+
+        // Check if host has MAC address for wake-on-LAN
+        let has_mac = self.current_host.as_ref()
+            .and_then(|hostname| self.config.hosts.get(hostname))
+            .and_then(|details| details.mac_address.as_ref())
+            .is_some();
+
+        // Create message content
+        let mut lines = vec![
+            Line::from(Span::styled(
+                format!("Host '{}' is offline", hostname),
+                Style::default().fg(Theme::muted_text()).add_modifier(Modifier::BOLD),
+            )),
+            Line::from(""),
+        ];
+
+        if has_mac {
+            lines.push(Line::from(Span::styled(
+                "Press 'w' to wake up host",
+                Style::default().fg(Theme::primary_text()).add_modifier(Modifier::BOLD),
+            )));
+        } else {
+            lines.push(Line::from(Span::styled(
+                "No MAC address configured - cannot wake up",
+                Style::default().fg(Theme::muted_text()),
+            )));
        }

-        if let Some(ref hostname) = self.current_host {
-            // Discover storage pools from metrics (look for disk_{pool}_usage_percent patterns)
-            let mut storage_pools: std::collections::HashMap<String, Vec<String>> = 
-                std::collections::HashMap::new();
-            
-            let all_metrics = metric_store.get_metrics_for_host(hostname);
-            
-            // Find storage pools by looking for usage metrics
-            for metric in &all_metrics {
-                if metric.name.starts_with("disk_") && metric.name.ends_with("_usage_percent") {
-                    let pool_name = metric.name
-                        .strip_prefix("disk_")
-                        .and_then(|s| s.strip_suffix("_usage_percent"))
-                        .unwrap_or_default()
-                        .to_string();
-                    
-                    if !pool_name.is_empty() && pool_name != "tmp" {
-                        storage_pools.entry(pool_name.clone()).or_insert_with(Vec::new);
-                    }
-                }
-            }
-            
-            // Find individual drives for each pool
-            for metric in &all_metrics {
-                if metric.name.starts_with("disk_") && metric.name.contains("_") && metric.name.ends_with("_health") {
-                    // Parse disk_{pool}_{drive}_health format
-                    let parts: Vec<&str> = metric.name.split('_').collect();
-                    if parts.len() >= 4 && parts[0] == "disk" && parts[parts.len()-1] == "health" {
-                        // Extract pool name (everything between "disk_" and "_{drive}_health")
-                        let drive_name = parts[parts.len()-2].to_string();
-                        let pool_part_end = parts.len() - 2;
-                        let pool_name = parts[1..pool_part_end].join("_");
-                        
-                        if let Some(drives) = storage_pools.get_mut(&pool_name) {
-                            if !drives.contains(&drive_name) {
-                                drives.push(drive_name);
-                            }
-                        }
-                    }
-                }
-            }
+        // Create centered message
+        let message = Paragraph::new(lines)
+            .block(Block::default()
+                .borders(Borders::ALL)
+                .border_style(Style::default().fg(Theme::muted_text()))
+                .title(" Offline Host ")
+                .title_style(Style::default().fg(Theme::muted_text()).add_modifier(Modifier::BOLD)))
+            .style(Style::default().bg(Theme::background()).fg(Theme::primary_text()))
+            .alignment(Alignment::Center);

-            // Check if we found any storage pools
-            if storage_pools.is_empty() {
-                // No storage pools found - show error/waiting message
-                let content_chunks = ratatui::layout::Layout::default()
-                    .direction(Direction::Vertical)
-                    .constraints([Constraint::Length(1), Constraint::Min(0)])
-                    .split(area);
+        // Center the message in the available area
+        let popup_area = ratatui::layout::Layout::default()
+            .direction(Direction::Vertical)
+            .constraints([
+                Constraint::Percentage(40),
+                Constraint::Length(6),
+                Constraint::Percentage(40),
+            ])
+            .split(area)[1];

-                let storage_title = Paragraph::new("Storage:").style(Typography::widget_title());
-                frame.render_widget(storage_title, content_chunks[0]);
+        let popup_area = ratatui::layout::Layout::default()
+            .direction(Direction::Horizontal)
+            .constraints([
+                Constraint::Percentage(25),
+                Constraint::Percentage(50),
+                Constraint::Percentage(25),
+            ])
+            .split(popup_area)[1];

-                let no_storage_spans =
-                    StatusIcons::create_status_spans(Status::Unknown, "No storage pools detected");
-                let no_storage_para = Paragraph::new(ratatui::text::Line::from(no_storage_spans));
-                frame.render_widget(no_storage_para, content_chunks[1]);
-                return;
-            }
+        frame.render_widget(message, popup_area);
+    }

-            let available_lines = area.height as usize;
-            let mut constraints = Vec::new();
-            let mut pools_to_show = Vec::new();
-            let mut current_line = 0;
-
-            // Sort storage pools by name for consistent ordering
-            let mut sorted_pools: Vec<_> = storage_pools.iter().collect();
-            sorted_pools.sort_by_key(|(pool_name, _)| pool_name.as_str());
-            
-            // Add section title if we have pools
-            let mut title_added = false;
-            
-            for (pool_name, drives) in sorted_pools {
-                // Calculate lines needed: pool header + drives + usage line (+ section title if first)
-                let section_title_lines = if !title_added { 1 } else { 0 };
-                let lines_for_this_pool = section_title_lines + 1 + drives.len() + 1;
-                
-                if current_line + lines_for_this_pool <= available_lines {
-                    pools_to_show.push((pool_name.clone(), drives.clone()));
-
-                    // Add section title constraint if this is the first pool
-                    if !title_added {
-                        constraints.push(Constraint::Length(1)); // "Storage:" section title
-                        title_added = true;
-                    }
-
-                    // Add constraints for this pool
-                    constraints.push(Constraint::Length(1)); // Pool header with status
-                    for _ in 0..drives.len() {
-                        constraints.push(Constraint::Length(1)); // Drive line with tree symbol
-                    }
-                    constraints.push(Constraint::Length(1)); // Usage line with end tree symbol
-
-                    current_line += lines_for_this_pool;
-                } else {
-                    break; // Can't fit more pools
-                }
-            }
-
-            // Add remaining space if any
-            if constraints.len() < available_lines {
-                constraints.push(Constraint::Min(0));
-            }
-
-            let content_chunks = ratatui::layout::Layout::default()
-                .direction(Direction::Vertical)
-                .constraints(constraints)
-                .split(area);
-
-            let mut chunk_index = 0;
-
-            // Render "Storage:" section title if we have pools
-            if !pools_to_show.is_empty() {
-                let storage_title = Paragraph::new("Storage:").style(Typography::widget_title());
-                frame.render_widget(storage_title, content_chunks[chunk_index]);
-                chunk_index += 1;
-            }
-
-            // Display each storage pool with tree structure
-            for (pool_name, drives) in &pools_to_show {
-                // Pool header with status icon and type
-                let pool_display_name = if pool_name == "root" {
-                    "root".to_string()
-                } else {
-                    pool_name.clone()
-                };
-                
-                let pool_type = if drives.len() > 1 { "multi-drive" } else { "Single" };
-                
-                // Get pool status from usage metric
-                let pool_status = metric_store
-                    .get_metric(hostname, &format!("disk_{}_usage_percent", pool_name))
-                    .map(|m| m.status)
-                    .unwrap_or(Status::Unknown);
-                
-                // Create pool header with status icon
-                let pool_status_icon = StatusIcons::get_icon(pool_status);
-                let pool_status_color = Theme::status_color(pool_status);
-                let pool_header_text = format!("{} ({}):", pool_display_name, pool_type);
-                
-                let pool_header_spans = vec![
-                    ratatui::text::Span::styled(
-                        format!("{} ", pool_status_icon),
-                        Style::default().fg(pool_status_color),
-                    ),
-                    ratatui::text::Span::styled(
-                        pool_header_text,
-                        Style::default().fg(Theme::primary_text()),
-                    ),
-                ];
-                let pool_header_para = Paragraph::new(ratatui::text::Line::from(pool_header_spans));
-                frame.render_widget(pool_header_para, content_chunks[chunk_index]);
-                chunk_index += 1;
-
-                // Individual drive lines with tree symbols
-                let mut sorted_drives = drives.clone();
-                sorted_drives.sort();
-                for (_drive_idx, drive_name) in sorted_drives.iter().enumerate() {
-                    // Get drive health status
-                    let drive_health_metric = metric_store
-                        .get_metric(hostname, &format!("disk_{}_{}_health", pool_name, drive_name));
-                    let drive_status = drive_health_metric
-                        .map(|m| m.status)
-                        .unwrap_or(Status::Unknown);
-                    
-                    // Get drive temperature
-                    let temp_text = metric_store
-                        .get_metric(hostname, &format!("disk_{}_{}_temperature", pool_name, drive_name))
-                        .and_then(|m| m.value.as_f32())
-                        .map(|temp| format!(" T:{:.0}°C", temp))
-                        .unwrap_or_default();
-
-                    // Get drive wear level (SSDs)
-                    let wear_text = metric_store
-                        .get_metric(hostname, &format!("disk_{}_{}_wear_percent", pool_name, drive_name))
-                        .and_then(|m| m.value.as_f32())
-                        .map(|wear| format!(" W:{:.0}%", wear))
-                        .unwrap_or_default();
-
-                    // Build drive line with tree symbol
-                    let tree_symbol = "├─";
-                    let drive_status_icon = StatusIcons::get_icon(drive_status);
-                    let drive_status_color = Theme::status_color(drive_status);
-                    let drive_text = format!("{}{}{}", drive_name, temp_text, wear_text);
-                    
-                    let drive_spans = vec![
-                        ratatui::text::Span::styled("  ", Style::default()), // 2-space indentation
-                        ratatui::text::Span::styled(
-                            format!("{} ", tree_symbol),
-                            Style::default().fg(Theme::muted_text()),
-                        ),
-                        ratatui::text::Span::styled(
-                            format!("{} ", drive_status_icon),
-                            Style::default().fg(drive_status_color),
-                        ),
-                        ratatui::text::Span::styled(
-                            drive_text,
-                            Style::default().fg(Theme::primary_text()),
-                        ),
-                    ];
-                    let drive_para = Paragraph::new(ratatui::text::Line::from(drive_spans));
-                    frame.render_widget(drive_para, content_chunks[chunk_index]);
-                    chunk_index += 1;
-                }
-
-                // Usage line with end tree symbol and status icon
-                let usage_percent = metric_store
-                    .get_metric(hostname, &format!("disk_{}_usage_percent", pool_name))
-                    .and_then(|m| m.value.as_f32())
-                    .unwrap_or(0.0);
-
-                let used_gb = metric_store
-                    .get_metric(hostname, &format!("disk_{}_used_gb", pool_name))
-                    .and_then(|m| m.value.as_f32())
-                    .unwrap_or(0.0);
-
-                let total_gb = metric_store
-                    .get_metric(hostname, &format!("disk_{}_total_gb", pool_name))
-                    .and_then(|m| m.value.as_f32())
-                    .unwrap_or(0.0);
-
-                let usage_status = metric_store
-                    .get_metric(hostname, &format!("disk_{}_usage_percent", pool_name))
-                    .map(|m| m.status)
-                    .unwrap_or(Status::Unknown);
-
-                // Format usage with proper units
-                let (used_display, total_display, unit) = if total_gb < 1.0 {
-                    (used_gb * 1024.0, total_gb * 1024.0, "MB")
-                } else {
-                    (used_gb, total_gb, "GB")
-                };
-
-                let end_tree_symbol = "└─";
-                let usage_status_icon = StatusIcons::get_icon(usage_status);
-                let usage_status_color = Theme::status_color(usage_status);
-                let usage_text = format!("{:.1}% {:.1}{}/{:.1}{}", 
-                    usage_percent, used_display, unit, total_display, unit);
-                
-                let usage_spans = vec![
-                    ratatui::text::Span::styled("  ", Style::default()), // 2-space indentation
-                    ratatui::text::Span::styled(
-                        format!("{} ", end_tree_symbol),
-                        Style::default().fg(Theme::muted_text()),
-                    ),
-                    ratatui::text::Span::styled(
-                        format!("{} ", usage_status_icon),
-                        Style::default().fg(usage_status_color),
-                    ),
-                    ratatui::text::Span::styled(
-                        usage_text,
-                        Style::default().fg(Theme::primary_text()),
-                    ),
-                ];
-                let usage_para = Paragraph::new(ratatui::text::Line::from(usage_spans));
-                frame.render_widget(usage_para, content_chunks[chunk_index]);
-                chunk_index += 1;
-            }
-
-            // Show truncation indicator if we couldn't display all pools
-            if pools_to_show.len() < storage_pools.len() {
-                if let Some(last_chunk) = content_chunks.last() {
-                    let truncated_count = storage_pools.len() - pools_to_show.len();
-                    let truncated_text = format!(
-                        "... and {} more pool{}",
-                        truncated_count,
-                        if truncated_count == 1 { "" } else { "s" }
-                    );
-                    let truncated_para = Paragraph::new(truncated_text).style(Typography::muted());
-                    frame.render_widget(truncated_para, *last_chunk);
-                }
-            }
+    /// Parse MAC address string (e.g., "AA:BB:CC:DD:EE:FF") to [u8; 6]
+    /// Get the connection IP for a hostname based on host configuration
+    fn get_connection_ip(&self, hostname: &str) -> String {
+        if let Some(host_details) = self.config.hosts.get(hostname) {
+            host_details.get_connection_ip(hostname)
        } else {
-            // No host connected
-            let content_chunks = ratatui::layout::Layout::default()
-                .direction(Direction::Vertical)
-                .constraints([Constraint::Length(1), Constraint::Min(0)])
-                .split(area);
-
-            let storage_title = Paragraph::new("Storage:").style(Typography::widget_title());
-            frame.render_widget(storage_title, content_chunks[0]);
-
-            let no_host_spans =
-                StatusIcons::create_status_spans(Status::Unknown, "No host connected");
-            let no_host_para = Paragraph::new(ratatui::text::Line::from(no_host_spans));
-            frame.render_widget(no_host_para, content_chunks[1]);
+            hostname.to_string()
        }
    }
+
+    fn parse_mac_address(mac_str: &str) -> Result<[u8; 6], &'static str> {
+        let parts: Vec<&str> = mac_str.split(':').collect();
+        if parts.len() != 6 {
+            return Err("MAC address must have 6 parts separated by colons");
+        }
+
+        let mut mac = [0u8; 6];
+        for (i, part) in parts.iter().enumerate() {
+            match u8::from_str_radix(part, 16) {
+                Ok(byte) => mac[i] = byte,
+                Err(_) => return Err("Invalid hexadecimal byte in MAC address"),
+            }
+        }
+        Ok(mac)
+    }
 }
--- a/dashboard/src/ui/theme.rs
+++ b/dashboard/src/ui/theme.rs
@@ -143,10 +143,12 @@ impl Theme {
    pub fn status_color(status: Status) -> Color {
        match status {
            Status::Ok => Self::success(),
+            Status::Inactive => Self::muted_text(), // Gray for inactive services
            Status::Pending => Self::highlight(), // Blue for pending
            Status::Warning => Self::warning(),
            Status::Critical => Self::error(),
            Status::Unknown => Self::muted_text(),
+            Status::Offline => Self::muted_text(), // Dark gray for offline
        }
    }

@@ -226,10 +228,6 @@ impl Layout {
    /// System vs backup split (equal)
    pub const SYSTEM_PANEL_HEIGHT: u16 = 50;
    pub const BACKUP_PANEL_HEIGHT: u16 = 50;
-    /// System panel CPU section height
-    pub const CPU_SECTION_HEIGHT: u16 = 2;
-    /// System panel memory section height  
-    pub const MEMORY_SECTION_HEIGHT: u16 = 3;
 }

 /// Typography system
@@ -246,10 +244,12 @@ impl StatusIcons {
    pub fn get_icon(status: Status) -> &'static str {
        match status {
            Status::Ok => "●",
+            Status::Inactive => "○", // Empty circle for inactive services
            Status::Pending => "◉", // Hollow circle for pending
            Status::Warning => "◐",
-            Status::Critical => "◯",
+            Status::Critical => "!",
            Status::Unknown => "?",
+            Status::Offline => "○", // Empty circle for offline
        }
    }

@@ -258,10 +258,12 @@ impl StatusIcons {
        let icon = Self::get_icon(status);
        let status_color = match status {
            Status::Ok => Theme::success(),         // Green
+            Status::Inactive => Theme::muted_text(), // Gray for inactive services
            Status::Pending => Theme::highlight(),       // Blue
            Status::Warning => Theme::warning(),    // Yellow
            Status::Critical => Theme::error(),     // Red
            Status::Unknown => Theme::muted_text(), // Gray
+            Status::Offline => Theme::muted_text(), // Dark gray for offline
        };

        vec![
@@ -293,27 +295,9 @@ impl Components {
            )
    }

-    /// Widget block with focus indicator (blue border)
-    pub fn focused_widget_block(title: &str) -> Block<'_> {
-        Block::default()
-            .title(title)
-            .borders(Borders::ALL)
-            .style(Style::default().fg(Theme::highlight()).bg(Theme::background())) // Blue border for focus
-            .title_style(
-                Style::default()
-                    .fg(Theme::highlight()) // Blue title for focus
-                    .bg(Theme::background()),
-            )
-    }
 }

 impl Typography {
-    /// Main title style (dashboard header)
-    pub fn title() -> Style {
-        Style::default()
-            .fg(Theme::primary_text())
-            .bg(Theme::background())
-    }

    /// Widget title style (panel headers) - bold bright white
    pub fn widget_title() -> Style {
--- a/dashboard/src/ui/widgets/backup.rs
+++ b/dashboard/src/ui/widgets/backup.rs
@@ -81,38 +81,7 @@ impl BackupWidget {



-    /// Format timestamp for display
-    fn format_last_run(&self) -> String {
-        match self.last_run_timestamp {
-            Some(timestamp) => {
-                let duration = chrono::Utc::now().timestamp() - timestamp;
-                if duration < 3600 {
-                    format!("{}m ago", duration / 60)
-                } else if duration < 86400 {
-                    format!("{}h ago", duration / 3600)
-                } else {
-                    format!("{}d ago", duration / 86400)
-                }
-            }
-            None => "—".to_string(),
-        }
-    }

-    /// Format disk usage in format "usedGB/totalGB"
-    fn format_repo_size(&self) -> String {
-        match (self.backup_disk_used_gb, self.backup_disk_total_gb) {
-            (Some(used_gb), Some(total_gb)) => {
-                let used_str = Self::format_size_with_proper_units(used_gb);
-                let total_str = Self::format_size_with_proper_units(total_gb);
-                format!("{}/{}", used_str, total_str)
-            }
-            (Some(used_gb), None) => {
-                // Fallback to just used size if total not available
-                Self::format_size_with_proper_units(used_gb)
-            }
-            _ => "—".to_string(),
-        }
-    }

    /// Format size with proper units (xxxkB/MB/GB/TB)
    fn format_size_with_proper_units(size_gb: f32) -> String {
@@ -137,23 +106,7 @@ impl BackupWidget {
        }
    }

-    /// Format product name display
-    fn format_product_name(&self) -> String {
-        if let Some(ref product_name) = self.backup_disk_product_name {
-            format!("P/N: {}", product_name)
-        } else {
-            "P/N: Unknown".to_string()
-        }
-    }

-    /// Format serial number display
-    fn format_serial_number(&self) -> String {
-        if let Some(ref serial) = self.backup_disk_serial_number {
-            format!("S/N: {}", serial)
-        } else {
-            "S/N: Unknown".to_string()
-        }
-    }

    /// Extract service name from metric name (e.g., "backup_service_gitea_status" -> "gitea")
    fn extract_service_name(metric_name: &str) -> Option<String> {
@@ -306,7 +259,12 @@ impl Widget for BackupWidget {
        services.sort_by(|a, b| a.name.cmp(&b.name));
        self.service_metrics = services;

-        self.has_data = !metrics.is_empty();
+        // Only show backup panel if we have meaningful backup data
+        self.has_data = !metrics.is_empty() && (
+            self.last_run_timestamp.is_some() ||
+            self.total_repo_size_gb.is_some() ||
+            !self.service_metrics.is_empty()
+        );

        debug!(
            "Backup widget updated: status={:?}, services={}, total_size={:?}GB",
@@ -324,9 +282,6 @@ impl Widget for BackupWidget {
        }
    }

-    fn render(&mut self, frame: &mut Frame, area: Rect) {
-        self.render_with_scroll(frame, area, 0);
-    }
 }

 impl BackupWidget {
--- a/dashboard/src/ui/widgets/cpu.rs
+++ b/dashboard/src/ui/widgets/cpu.rs
@@ -1,139 +1 @@
-use cm_dashboard_shared::{Metric, Status};
-use ratatui::{
-    layout::{Constraint, Direction, Layout, Rect},
-    widgets::Paragraph,
-    Frame,
-};
-use tracing::debug;
-
-use super::Widget;
-use crate::ui::theme::{StatusIcons, Typography};
-
-/// CPU widget displaying load, temperature, and frequency
-#[derive(Clone)]
-pub struct CpuWidget {
-    /// CPU load averages (1, 5, 15 minutes)
-    load_1min: Option<f32>,
-    load_5min: Option<f32>,
-    load_15min: Option<f32>,
-    /// CPU temperature in Celsius
-    temperature: Option<f32>,
-    /// CPU frequency in MHz
-    frequency: Option<f32>,
-    /// Aggregated status
-    status: Status,
-    /// Last update indicator
-    has_data: bool,
-}
-
-impl CpuWidget {
-    pub fn new() -> Self {
-        Self {
-            load_1min: None,
-            load_5min: None,
-            load_15min: None,
-            temperature: None,
-            frequency: None,
-            status: Status::Unknown,
-            has_data: false,
-        }
-    }
-
-    /// Format load average for display
-    fn format_load(&self) -> String {
-        match (self.load_1min, self.load_5min, self.load_15min) {
-            (Some(l1), Some(l5), Some(l15)) => {
-                format!("{:.2} {:.2} {:.2}", l1, l5, l15)
-            }
-            _ => "— — —".to_string(),
-        }
-    }
-
-    /// Format frequency for display
-    fn format_frequency(&self) -> String {
-        match self.frequency {
-            Some(freq) => format!("{:.1} MHz", freq),
-            None => "— MHz".to_string(),
-        }
-    }
-}
-
-impl Widget for CpuWidget {
-    fn update_from_metrics(&mut self, metrics: &[&Metric]) {
-        debug!("CPU widget updating with {} metrics", metrics.len());
-
-        // Reset status aggregation
-        let mut statuses = Vec::new();
-
-        for metric in metrics {
-            match metric.name.as_str() {
-                "cpu_load_1min" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.load_1min = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "cpu_load_5min" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.load_5min = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "cpu_load_15min" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.load_15min = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "cpu_temperature_celsius" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.temperature = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "cpu_frequency_mhz" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.frequency = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                _ => {}
-            }
-        }
-
-        // Aggregate status
-        self.status = if statuses.is_empty() {
-            Status::Unknown
-        } else {
-            Status::aggregate(&statuses)
-        };
-
-        self.has_data = !metrics.is_empty();
-
-        debug!(
-            "CPU widget updated: load={:?}, temp={:?}, freq={:?}, status={:?}",
-            self.load_1min, self.temperature, self.frequency, self.status
-        );
-    }
-
-    fn render(&mut self, frame: &mut Frame, area: Rect) {
-        let content_chunks = Layout::default()
-            .direction(Direction::Vertical)
-            .constraints([Constraint::Length(1), Constraint::Length(1)])
-            .split(area);
-        let cpu_title = Paragraph::new("CPU:").style(Typography::widget_title());
-        frame.render_widget(cpu_title, content_chunks[0]);
-        let load_freq_spans = StatusIcons::create_status_spans(
-            self.status,
-            &format!("Load: {} • {}", self.format_load(), self.format_frequency()),
-        );
-        let load_freq_para = Paragraph::new(ratatui::text::Line::from(load_freq_spans));
-        frame.render_widget(load_freq_para, content_chunks[1]);
-    }
-}
-
-impl Default for CpuWidget {
-    fn default() -> Self {
-        Self::new()
-    }
-}
+// This file is intentionally left minimal - CPU functionality is handled by the SystemWidget
--- a/dashboard/src/ui/widgets/memory.rs
+++ b/dashboard/src/ui/widgets/memory.rs
@@ -1,253 +1 @@
-use cm_dashboard_shared::{Metric, Status};
-use ratatui::{
-    layout::{Constraint, Direction, Layout, Rect},
-    widgets::Paragraph,
-    Frame,
-};
-use tracing::debug;
-
-use super::Widget;
-use crate::ui::theme::{StatusIcons, Typography};
-
-/// Memory widget displaying usage, totals, and swap information
-#[derive(Clone)]
-pub struct MemoryWidget {
-    /// Memory usage percentage
-    usage_percent: Option<f32>,
-    /// Total memory in GB
-    total_gb: Option<f32>,
-    /// Used memory in GB
-    used_gb: Option<f32>,
-    /// Available memory in GB
-    available_gb: Option<f32>,
-    /// Total swap in GB
-    swap_total_gb: Option<f32>,
-    /// Used swap in GB
-    swap_used_gb: Option<f32>,
-    /// /tmp directory size in MB
-    tmp_size_mb: Option<f32>,
-    /// /tmp total size in MB
-    tmp_total_mb: Option<f32>,
-    /// /tmp usage percentage
-    tmp_usage_percent: Option<f32>,
-    /// Aggregated status
-    status: Status,
-    /// Last update indicator
-    has_data: bool,
-}
-
-impl MemoryWidget {
-    pub fn new() -> Self {
-        Self {
-            usage_percent: None,
-            total_gb: None,
-            used_gb: None,
-            available_gb: None,
-            swap_total_gb: None,
-            swap_used_gb: None,
-            tmp_size_mb: None,
-            tmp_total_mb: None,
-            tmp_usage_percent: None,
-            status: Status::Unknown,
-            has_data: false,
-        }
-    }
-
-    /// Get memory usage percentage for gauge
-    fn get_memory_percentage(&self) -> u16 {
-        match self.usage_percent {
-            Some(percent) => percent.min(100.0).max(0.0) as u16,
-            None => {
-                // Calculate from used/total if percentage not available
-                match (self.used_gb, self.total_gb) {
-                    (Some(used), Some(total)) if total > 0.0 => {
-                        let percent = (used / total * 100.0).min(100.0).max(0.0);
-                        percent as u16
-                    }
-                    _ => 0,
-                }
-            }
-        }
-    }
-
-    /// Format size with proper units (kB/MB/GB)
-    fn format_size_units(size_mb: f32) -> String {
-        if size_mb >= 1024.0 {
-            // Convert to GB
-            let size_gb = size_mb / 1024.0;
-            format!("{:.1}GB", size_gb)
-        } else if size_mb >= 1.0 {
-            // Show as MB
-            format!("{:.0}MB", size_mb)
-        } else if size_mb >= 0.001 {
-            // Convert to kB
-            let size_kb = size_mb * 1024.0;
-            format!("{:.0}kB", size_kb)
-        } else {
-            // Show very small sizes in bytes
-            let size_bytes = size_mb * 1024.0 * 1024.0;
-            format!("{:.0}B", size_bytes)
-        }
-    }
-
-    /// Format /tmp usage as "xx% yyykB/MB/GB/zzzGB"
-    fn format_tmp_usage(&self) -> String {
-        match (self.tmp_usage_percent, self.tmp_size_mb, self.tmp_total_mb) {
-            (Some(percent), Some(used_mb), Some(total_mb)) => {
-                let used_str = Self::format_size_units(used_mb);
-                let total_str = Self::format_size_units(total_mb);
-                format!("{:.1}% {}/{}", percent, used_str, total_str)
-            }
-            (Some(percent), Some(used_mb), None) => {
-                let used_str = Self::format_size_units(used_mb);
-                format!("{:.1}% {}", percent, used_str)
-            }
-            (None, Some(used_mb), Some(total_mb)) => {
-                let used_str = Self::format_size_units(used_mb);
-                let total_str = Self::format_size_units(total_mb);
-                format!("{}/{}", used_str, total_str)
-            }
-            (None, Some(used_mb), None) => Self::format_size_units(used_mb),
-            _ => "—".to_string(),
-        }
-    }
-
-    /// Get tmp status based on usage percentage
-    fn get_tmp_status(&self) -> Status {
-        if let Some(tmp_percent) = self.tmp_usage_percent {
-            if tmp_percent >= 90.0 {
-                Status::Critical
-            } else if tmp_percent >= 70.0 {
-                Status::Warning
-            } else {
-                Status::Ok
-            }
-        } else {
-            Status::Unknown
-        }
-    }
-}
-
-impl Widget for MemoryWidget {
-    fn update_from_metrics(&mut self, metrics: &[&Metric]) {
-        debug!("Memory widget updating with {} metrics", metrics.len());
-
-        // Reset status aggregation
-        let mut statuses = Vec::new();
-
-        for metric in metrics {
-            match metric.name.as_str() {
-                "memory_usage_percent" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.usage_percent = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "memory_total_gb" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.total_gb = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "memory_used_gb" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.used_gb = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "memory_available_gb" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.available_gb = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "memory_swap_total_gb" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.swap_total_gb = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "memory_swap_used_gb" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.swap_used_gb = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "disk_tmp_size_mb" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.tmp_size_mb = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "disk_tmp_total_mb" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.tmp_total_mb = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                "disk_tmp_usage_percent" => {
-                    if let Some(value) = metric.value.as_f32() {
-                        self.tmp_usage_percent = Some(value);
-                        statuses.push(metric.status);
-                    }
-                }
-                _ => {}
-            }
-        }
-
-        // Aggregate status
-        self.status = if statuses.is_empty() {
-            Status::Unknown
-        } else {
-            Status::aggregate(&statuses)
-        };
-
-        self.has_data = !metrics.is_empty();
-
-        debug!("Memory widget updated: usage={:?}%, total={:?}GB, swap_total={:?}GB, tmp={:?}/{:?}MB, status={:?}",
-               self.usage_percent, self.total_gb, self.swap_total_gb, self.tmp_size_mb, self.tmp_total_mb, self.status);
-    }
-
-    fn render(&mut self, frame: &mut Frame, area: Rect) {
-        let content_chunks = Layout::default()
-            .direction(Direction::Vertical)
-            .constraints([
-                Constraint::Length(1),
-                Constraint::Length(1),
-                Constraint::Length(1),
-            ])
-            .split(area);
-        let mem_title = Paragraph::new("RAM:").style(Typography::widget_title());
-        frame.render_widget(mem_title, content_chunks[0]);
-
-        // Format used and total memory with smart units, percentage, and status icon
-        let used_str = self
-            .used_gb
-            .map_or("—".to_string(), |v| Self::format_size_units(v * 1024.0)); // Convert GB to MB for formatting
-        let total_str = self
-            .total_gb
-            .map_or("—".to_string(), |v| Self::format_size_units(v * 1024.0)); // Convert GB to MB for formatting
-        let percentage = self.get_memory_percentage();
-        let mem_details_spans = StatusIcons::create_status_spans(
-            self.status,
-            &format!("Used: {}% {}/{}", percentage, used_str, total_str),
-        );
-        let mem_details_para = Paragraph::new(ratatui::text::Line::from(mem_details_spans));
-        frame.render_widget(mem_details_para, content_chunks[1]);
-
-        // /tmp usage line with status icon
-        let tmp_status = self.get_tmp_status();
-        let tmp_spans = StatusIcons::create_status_spans(
-            tmp_status,
-            &format!("tmp: {}", self.format_tmp_usage()),
-        );
-        let tmp_para = Paragraph::new(ratatui::text::Line::from(tmp_spans));
-        frame.render_widget(tmp_para, content_chunks[2]);
-    }
-}
-
-impl Default for MemoryWidget {
-    fn default() -> Self {
-        Self::new()
-    }
-}
+// This file is intentionally left minimal - Memory functionality is handled by the SystemWidget
--- a/dashboard/src/ui/widgets/mod.rs
+++ b/dashboard/src/ui/widgets/mod.rs
@@ -1,5 +1,4 @@
 use cm_dashboard_shared::Metric;
-use ratatui::{layout::Rect, Frame};

 pub mod backup;
 pub mod cpu;
@@ -16,6 +15,4 @@ pub trait Widget {
    /// Update widget with new metrics data
    fn update_from_metrics(&mut self, metrics: &[&Metric]);

-    /// Render the widget to a terminal frame
-    fn render(&mut self, frame: &mut Frame, area: Rect);
 }
--- a/dashboard/src/ui/widgets/services.rs
+++ b/dashboard/src/ui/widgets/services.rs
@@ -9,7 +9,6 @@ use tracing::debug;

 use super::Widget;
 use crate::ui::theme::{Components, StatusIcons, Theme, Typography};
-use crate::ui::{CommandStatus, CommandType};
 use ratatui::style::Style;

 /// Services widget displaying hierarchical systemd service statuses
@@ -113,13 +112,10 @@ impl ServicesWidget {
            name.to_string()
        };

-        // Parent services always show active/inactive status
+        // Parent services always show actual systemctl status
        let status_str = match info.widget_status {
-            Status::Ok => "active".to_string(),
            Status::Pending => "pending".to_string(),
-            Status::Warning => "inactive".to_string(),
-            Status::Critical => "failed".to_string(),
-            Status::Unknown => "unknown".to_string(),
+            _ => info.status.clone(), // Use actual status from agent (active/inactive/failed)
        };

        format!(
@@ -128,40 +124,7 @@ impl ServicesWidget {
        )
    }

-    /// Get status icon for service, considering command status for visual feedback
-    fn get_service_icon_and_status(&self, service_name: &str, info: &ServiceInfo, command_status: Option<&CommandStatus>) -> (String, String, ratatui::prelude::Color) {
-        // Check if this service is currently being operated on
-        if let Some(status) = command_status {
-            match status {
-                CommandStatus::InProgress { command_type, target, .. } => {
-                    if target == service_name {
-                        // Only show special icons for service commands
-                        if let Some((icon, status_text)) = match command_type {
-                            CommandType::ServiceRestart => Some(("↻", "restarting")),
-                            CommandType::ServiceStart => Some(("↑", "starting")), 
-                            CommandType::ServiceStop => Some(("↓", "stopping")),
-                            _ => None, // Don't handle non-service commands here
-                        } {
-                            return (icon.to_string(), status_text.to_string(), Theme::highlight());
-                        }
-                    }
-                }
-                _ => {} // Success/Failed states will show normal status
-            }
-        }
-        
-        // Normal status display
-        let icon = StatusIcons::get_icon(info.widget_status);
-        let status_color = match info.widget_status {
-            Status::Ok => Theme::success(),
-            Status::Pending => Theme::highlight(),
-            Status::Warning => Theme::warning(),
-            Status::Critical => Theme::error(),
-            Status::Unknown => Theme::muted_text(),
-        };
-        
-        (icon.to_string(), info.status.clone(), status_color)
-    }
+

    /// Create spans for sub-service with icon next to name
    fn create_sub_service_spans(
@@ -169,17 +132,6 @@ impl ServicesWidget {
        name: &str,
        info: &ServiceInfo,
        is_last: bool,
-    ) -> Vec<ratatui::text::Span<'static>> {
-        self.create_sub_service_spans_with_status(name, info, is_last, None)
-    }
-
-    /// Create spans for sub-service with icon next to name, considering command status
-    fn create_sub_service_spans_with_status(
-        &self,
-        name: &str,
-        info: &ServiceInfo,
-        is_last: bool,
-        command_status: Option<&CommandStatus>,
    ) -> Vec<ratatui::text::Span<'static>> {
        // Truncate long sub-service names to fit layout (accounting for indentation)
        let short_name = if name.len() > 18 {
@@ -188,19 +140,28 @@ impl ServicesWidget {
            name.to_string()
        };

-        // Get status icon and text, considering command status
-        let (icon, mut status_str, status_color) = self.get_service_icon_and_status(name, info, command_status);
+        // Get status icon and text
+        let icon = StatusIcons::get_icon(info.widget_status);
+        let status_color = match info.widget_status {
+            Status::Ok => Theme::success(),
+            Status::Inactive => Theme::muted_text(),
+            Status::Pending => Theme::highlight(),
+            Status::Warning => Theme::warning(),
+            Status::Critical => Theme::error(),
+            Status::Unknown => Theme::muted_text(),
+            Status::Offline => Theme::muted_text(),
+        };

-        // For sub-services, prefer latency if available (unless command is in progress)
-        if command_status.is_none() {
-            if let Some(latency) = info.latency_ms {
-                status_str = if latency < 0.0 {
-                    "timeout".to_string()
-                } else {
-                    format!("{:.0}ms", latency)
-                };
+        // For sub-services, prefer latency if available
+        let status_str = if let Some(latency) = info.latency_ms {
+            if latency < 0.0 {
+                "timeout".to_string()
+            } else {
+                format!("{:.0}ms", latency)
            }
-        }
+        } else {
+            info.status.clone()
+        };
        let tree_symbol = if is_last { "└─" } else { "├─" };

        vec![
@@ -250,13 +211,14 @@ impl ServicesWidget {
    /// Get currently selected service name (for actions)
    pub fn get_selected_service(&self) -> Option<String> {
        // Build the same display list to find the selected service
-        let mut display_lines: Vec<(String, Status, bool, Option<(ServiceInfo, bool)>)> = Vec::new();
+        let mut display_lines: Vec<(String, Status, bool, Option<(ServiceInfo, bool)>, String)> = Vec::new();
        
        let mut parent_services: Vec<_> = self.parent_services.iter().collect();
        parent_services.sort_by(|(a, _), (b, _)| a.cmp(b));

        for (parent_name, parent_info) in parent_services {
-            display_lines.push((parent_name.clone(), parent_info.widget_status, false, None));
+            let parent_line = self.format_parent_service_line(parent_name, parent_info);
+            display_lines.push((parent_line, parent_info.widget_status, false, None, parent_name.clone()));
            
            if let Some(sub_list) = self.sub_services.get(parent_name) {
                let mut sorted_subs = sub_list.clone();
@@ -264,17 +226,19 @@ impl ServicesWidget {
                
                for (i, (sub_name, sub_info)) in sorted_subs.iter().enumerate() {
                    let is_last_sub = i == sorted_subs.len() - 1;
+                    let full_sub_name = format!("{}_{}", parent_name, sub_name);
                    display_lines.push((
-                        format!("{}_{}", parent_name, sub_name), // Use parent_sub format for sub-services
+                        sub_name.clone(),
                        sub_info.widget_status,
                        true,
                        Some((sub_info.clone(), is_last_sub)),
+                        full_sub_name,
                    ));
                }
            }
        }
        
-        display_lines.get(self.selected_index).map(|(name, _, _, _)| name.clone())
+        display_lines.get(self.selected_index).map(|(_, _, _, _, raw_name)| raw_name.clone())
    }

    /// Get total count of selectable services (parent services only, not sub-services)
@@ -283,6 +247,7 @@ impl ServicesWidget {
        self.parent_services.len()
    }

+
    /// Calculate which parent service index corresponds to a display line index
    fn calculate_parent_service_index(&self, display_line_index: &usize) -> usize {
        // Build the same display list to map line index to parent service index
@@ -432,24 +397,13 @@ impl Widget for ServicesWidget {
        );
    }

-    fn render(&mut self, frame: &mut Frame, area: Rect) {
-        self.render_with_focus(frame, area, false);
-    }
 }

 impl ServicesWidget {
-    /// Render with optional focus indicator and scroll support
-    pub fn render_with_focus(&mut self, frame: &mut Frame, area: Rect, is_focused: bool) {
-        self.render_with_focus_and_scroll(frame, area, is_focused, 0);
-    }

-    /// Render with focus, scroll, and command status for visual feedback
-    pub fn render_with_command_status(&mut self, frame: &mut Frame, area: Rect, is_focused: bool, scroll_offset: usize, command_status: Option<&CommandStatus>) {
-        let services_block = if is_focused {
-            Components::focused_widget_block("services")
-        } else {
-            Components::widget_block("services")
-        };
+    /// Render with focus and scroll
+    pub fn render(&mut self, frame: &mut Frame, area: Rect, is_focused: bool, scroll_offset: usize) {
+        let services_block = Components::widget_block("services");
        let inner_area = services_block.inner(area);
        frame.render_widget(services_block, area);

@@ -473,13 +427,13 @@ impl ServicesWidget {
            return;
        }

-        // Use the existing render logic but with command status
-        self.render_services_with_status(frame, content_chunks[1], is_focused, scroll_offset, command_status);
+        // Render the services list
+        self.render_services(frame, content_chunks[1], is_focused, scroll_offset);
    }

-    /// Render services list with command status awareness
-    fn render_services_with_status(&mut self, frame: &mut Frame, area: Rect, is_focused: bool, scroll_offset: usize, command_status: Option<&CommandStatus>) {
-        // Build hierarchical service list for display (same as existing logic)
+    /// Render services list
+    fn render_services(&mut self, frame: &mut Frame, area: Rect, is_focused: bool, scroll_offset: usize) {
+        // Build hierarchical service list for display
        let mut display_lines: Vec<(String, Status, bool, Option<(ServiceInfo, bool)>)> = Vec::new();

        // Sort parent services alphabetically for consistent order
@@ -489,7 +443,7 @@ impl ServicesWidget {
        for (parent_name, parent_info) in parent_services {
            // Add parent service line
            let parent_line = self.format_parent_service_line(parent_name, parent_info);
-            display_lines.push((parent_line, parent_info.widget_status, false, None)); // false = not sub-service
+            display_lines.push((parent_line, parent_info.widget_status, false, None));

            // Add sub-services for this parent (if any)
            if let Some(sub_list) = self.sub_services.get(parent_name) {
@@ -551,47 +505,23 @@ impl ServicesWidget {
                };
                
                let mut spans = if *is_sub && sub_info.is_some() {
-                    // Use custom sub-service span creation WITH command status
+                    // Use custom sub-service span creation
                    let (service_info, is_last) = sub_info.as_ref().unwrap();
-                    self.create_sub_service_spans_with_status(line_text, service_info, *is_last, command_status)
+                    self.create_sub_service_spans(line_text, service_info, *is_last)
                } else {
-                    // Parent services - check if this parent service has a command in progress
-                    let service_spans = if let Some(status) = command_status {
-                        match status {
-                            CommandStatus::InProgress { target, .. } => {
-                                if target == line_text {
-                                    // Create spans with progress status
-                                    let (icon, status_text, status_color) = self.get_service_icon_and_status(line_text, &ServiceInfo { 
-                                        status: "".to_string(), 
-                                        memory_mb: None, 
-                                        disk_gb: None, 
-                                        latency_ms: None, 
-                                        widget_status: *line_status 
-                                    }, command_status);
-                                    vec![
-                                        ratatui::text::Span::styled(format!("{} ", icon), Style::default().fg(status_color)),
-                                        ratatui::text::Span::styled(line_text.clone(), Style::default().fg(Theme::primary_text())),
-                                        ratatui::text::Span::styled(format!(" {}", status_text), Style::default().fg(status_color)),
-                                    ]
-                                } else {
-                                    StatusIcons::create_status_spans(*line_status, line_text)
-                                }
-                            }
-                            _ => StatusIcons::create_status_spans(*line_status, line_text)
-                        }
-                    } else {
-                        StatusIcons::create_status_spans(*line_status, line_text)
-                    };
-                    service_spans
+                    // Parent services - use normal status spans
+                    StatusIcons::create_status_spans(*line_status, line_text)
                };
                
-                // Apply selection highlighting to parent services only, preserving status icon color
+                // Apply selection highlighting to parent services only
                // Only show selection when Services panel is focused
                if is_selected && !*is_sub && is_focused {
                    for (i, span) in spans.iter_mut().enumerate() {
                        if i == 0 {
-                            // First span is the status icon - preserve its color
-                            span.style = span.style.bg(Theme::highlight());
+                            // First span is the status icon - use background color for visibility against blue selection
+                            span.style = span.style
+                                .bg(Theme::highlight())
+                                .fg(Theme::background());
                        } else {
                            // Other spans (text) get full selection highlighting
                            span.style = span.style
@@ -635,167 +565,6 @@ impl ServicesWidget {
            }
        }
    }
-
-    /// Render with focus indicator and scroll offset
-    pub fn render_with_focus_and_scroll(&mut self, frame: &mut Frame, area: Rect, is_focused: bool, scroll_offset: usize) {
-        let services_block = if is_focused {
-            Components::focused_widget_block("services")
-        } else {
-            Components::widget_block("services")
-        };
-        let inner_area = services_block.inner(area);
-        frame.render_widget(services_block, area);
-
-        let content_chunks = Layout::default()
-            .direction(Direction::Vertical)
-            .constraints([Constraint::Length(1), Constraint::Min(0)])
-            .split(inner_area);
-
-        // Header
-        let header = format!(
-            "{:<25} {:<10} {:<8} {:<8}",
-            "Service:", "Status:", "RAM:", "Disk:"
-        );
-        let header_para = Paragraph::new(header).style(Typography::muted());
-        frame.render_widget(header_para, content_chunks[0]);
-
-        // Check if we have any services to display
-        if self.parent_services.is_empty() && self.sub_services.is_empty() {
-            let empty_text = Paragraph::new("No process data").style(Typography::muted());
-            frame.render_widget(empty_text, content_chunks[1]);
-            return;
-        }
-
-        // Build hierarchical service list for display
-        let mut display_lines: Vec<(String, Status, bool, Option<(ServiceInfo, bool)>)> = Vec::new();
-
-        // Sort parent services alphabetically for consistent order
-        let mut parent_services: Vec<_> = self.parent_services.iter().collect();
-        parent_services.sort_by(|(a, _), (b, _)| a.cmp(b));
-
-        for (parent_name, parent_info) in parent_services {
-            // Add parent service line
-            let parent_line = self.format_parent_service_line(parent_name, parent_info);
-            display_lines.push((parent_line, parent_info.widget_status, false, None)); // false = not sub-service
-
-            // Add sub-services for this parent (if any)
-            if let Some(sub_list) = self.sub_services.get(parent_name) {
-                // Sort sub-services by name for consistent display
-                let mut sorted_subs = sub_list.clone();
-                sorted_subs.sort_by(|(a, _), (b, _)| a.cmp(b));
-
-                for (i, (sub_name, sub_info)) in sorted_subs.iter().enumerate() {
-                    let is_last_sub = i == sorted_subs.len() - 1;
-                    // Store sub-service info for custom span rendering
-                    display_lines.push((
-                        sub_name.clone(),
-                        sub_info.widget_status,
-                        true,
-                        Some((sub_info.clone(), is_last_sub)),
-                    )); // true = sub-service, with is_last info
-                }
-            }
-        }
-
-        // Apply scroll offset and render visible lines
-        let available_lines = content_chunks[1].height as usize;
-        let total_lines = display_lines.len();
-        
-        // Calculate scroll boundaries
-        let max_scroll = if total_lines > available_lines {
-            total_lines - available_lines
-        } else {
-            total_lines.saturating_sub(1)
-        };
-        let effective_scroll = scroll_offset.min(max_scroll);
-        
-        // Get visible lines after scrolling
-        let visible_lines: Vec<_> = display_lines
-            .iter()
-            .skip(effective_scroll)
-            .take(available_lines)
-            .collect();
-        
-        let lines_to_show = visible_lines.len();
-
-        if lines_to_show > 0 {
-            let service_chunks = Layout::default()
-                .direction(Direction::Vertical)
-                .constraints(vec![Constraint::Length(1); lines_to_show])
-                .split(content_chunks[1]);
-
-            for (i, (line_text, line_status, is_sub, sub_info)) in visible_lines.iter().enumerate()
-            {
-                let actual_index = effective_scroll + i; // Real index in the full list
-                
-                // Only parent services can be selected - calculate parent service index
-                let is_selected = if !*is_sub {
-                    // This is a parent service - count how many parent services came before this one
-                    let parent_index = self.calculate_parent_service_index(&actual_index);
-                    parent_index == self.selected_index
-                } else {
-                    false // Sub-services are never selected
-                };
-                
-                let mut spans = if *is_sub && sub_info.is_some() {
-                    // Use custom sub-service span creation
-                    let (service_info, is_last) = sub_info.as_ref().unwrap();
-                    self.create_sub_service_spans(line_text, service_info, *is_last)
-                } else {
-                    // Use regular status spans for parent services
-                    StatusIcons::create_status_spans(*line_status, line_text)
-                };
-                
-                // Apply selection highlighting to parent services only, preserving status icon color
-                // Only show selection when Services panel is focused
-                if is_selected && !*is_sub && is_focused {
-                    for (i, span) in spans.iter_mut().enumerate() {
-                        if i == 0 {
-                            // First span is the status icon - preserve its color
-                            span.style = span.style.bg(Theme::highlight());
-                        } else {
-                            // Other spans (text) get full selection highlighting
-                            span.style = span.style
-                                .bg(Theme::highlight())
-                                .fg(Theme::background());
-                        }
-                    }
-                }
-                
-                let service_para = Paragraph::new(ratatui::text::Line::from(spans));
-                
-                frame.render_widget(service_para, service_chunks[i]);
-            }
-        }
-
-        // Show scroll indicator if there are more services than we can display
-        if total_lines > available_lines {
-            let hidden_above = effective_scroll;
-            let hidden_below = total_lines.saturating_sub(effective_scroll + available_lines);
-            
-            if hidden_above > 0 || hidden_below > 0 {
-                let scroll_text = if hidden_above > 0 && hidden_below > 0 {
-                    format!("... {} above, {} below", hidden_above, hidden_below)
-                } else if hidden_above > 0 {
-                    format!("... {} more above", hidden_above)
-                } else {
-                    format!("... {} more below", hidden_below)
-                };
-                
-                if available_lines > 0 && lines_to_show > 0 {
-                    let last_line_area = Rect {
-                        x: content_chunks[1].x,
-                        y: content_chunks[1].y + (lines_to_show - 1) as u16,
-                        width: content_chunks[1].width,
-                        height: 1,
-                    };
-                    
-                    let scroll_para = Paragraph::new(scroll_text).style(Typography::muted());
-                    frame.render_widget(scroll_para, last_line_area);
-                }
-            }
-        }
-    }
 }

 impl Default for ServicesWidget {
--- a/dashboard/src/ui/widgets/system.rs
+++ b/dashboard/src/ui/widgets/system.rs
@@ -15,7 +15,6 @@ pub struct SystemWidget {
    // NixOS information
    nixos_build: Option<String>,
    config_hash: Option<String>,
-    active_users: Option<String>,
    agent_hash: Option<String>,
    
    // CPU metrics
@@ -33,6 +32,7 @@ pub struct SystemWidget {
    tmp_used_gb: Option<f32>,
    tmp_total_gb: Option<f32>,
    memory_status: Status,
+    tmp_status: Status,
    
    // Storage metrics (collected from disk metrics)
    storage_pools: Vec<StoragePool>,
@@ -66,7 +66,6 @@ impl SystemWidget {
        Self {
            nixos_build: None,
            config_hash: None,
-            active_users: None,
            agent_hash: None,
            cpu_load_1min: None,
            cpu_load_5min: None,
@@ -80,6 +79,7 @@ impl SystemWidget {
            tmp_used_gb: None,
            tmp_total_gb: None,
            memory_status: Status::Unknown,
+            tmp_status: Status::Unknown,
            storage_pools: Vec::new(),
            has_data: false,
        }
@@ -129,7 +129,7 @@ impl SystemWidget {
    }

    /// Get the current agent hash for rebuild completion detection
-    pub fn get_agent_hash(&self) -> Option<&String> {
+    pub fn _get_agent_hash(&self) -> Option<&String> {
        self.agent_hash.as_ref()
    }

@@ -230,9 +230,30 @@ impl SystemWidget {

    /// Extract pool name from disk metric name
    fn extract_pool_name(&self, metric_name: &str) -> Option<String> {
-        if let Some(captures) = metric_name.strip_prefix("disk_") {
-            if let Some(pos) = captures.find('_') {
-                return Some(captures[..pos].to_string());
+        // Pattern: disk_{pool_name}_{drive_name}_{metric_type}
+        // Since pool_name can contain underscores, work backwards from known metric suffixes
+        if metric_name.starts_with("disk_") {
+            // First try drive-specific metrics that have device names
+            if let Some(suffix_pos) = metric_name.rfind("_temperature")
+                .or_else(|| metric_name.rfind("_wear_percent"))
+                .or_else(|| metric_name.rfind("_health")) {
+                // Find the second-to-last underscore to get pool name
+                let before_suffix = &metric_name[..suffix_pos];
+                if let Some(drive_start) = before_suffix.rfind('_') {
+                    return Some(metric_name[5..drive_start].to_string()); // Skip "disk_"
+                }
+            }
+            // For pool-level metrics (usage_percent, used_gb, total_gb), take everything before the metric suffix
+            else if let Some(suffix_pos) = metric_name.rfind("_usage_percent")
+                .or_else(|| metric_name.rfind("_used_gb"))
+                .or_else(|| metric_name.rfind("_total_gb")) {
+                return Some(metric_name[5..suffix_pos].to_string()); // Skip "disk_"
+            }
+            // Fallback to old behavior for unknown patterns
+            else if let Some(captures) = metric_name.strip_prefix("disk_") {
+                if let Some(pos) = captures.find('_') {
+                    return Some(captures[..pos].to_string());
+                }
            }
        }
        None
@@ -240,16 +261,24 @@ impl SystemWidget {

    /// Extract drive name from disk metric name  
    fn extract_drive_name(&self, metric_name: &str) -> Option<String> {
-        // Pattern: disk_pool_drive_metric
-        let parts: Vec<&str> = metric_name.split('_').collect();
-        if parts.len() >= 3 && parts[0] == "disk" {
-            return Some(parts[2].to_string());
+        // Pattern: disk_{pool_name}_{drive_name}_{metric_type}
+        // Since pool_name can contain underscores, work backwards from known metric suffixes
+        if metric_name.starts_with("disk_") {
+            if let Some(suffix_pos) = metric_name.rfind("_temperature")
+                .or_else(|| metric_name.rfind("_wear_percent"))
+                .or_else(|| metric_name.rfind("_health")) {
+                // Find the second-to-last underscore to get the drive name
+                let before_suffix = &metric_name[..suffix_pos];
+                if let Some(drive_start) = before_suffix.rfind('_') {
+                    return Some(before_suffix[drive_start + 1..].to_string());
+                }
+            }
        }
        None
    }

    /// Render storage section with tree structure
-    fn render_storage(&self) -> Vec<Line> {
+    fn render_storage(&self) -> Vec<Line<'_>> {
        let mut lines = Vec::new();

        for pool in &self.storage_pools {
@@ -334,14 +363,9 @@ impl Widget for SystemWidget {
                        self.config_hash = Some(hash.clone());
                    }
                }
-                "system_active_users" => {
-                    if let MetricValue::String(users) = &metric.value {
-                        self.active_users = Some(users.clone());
-                    }
-                }
-                "system_agent_hash" => {
-                    if let MetricValue::String(hash) = &metric.value {
-                        self.agent_hash = Some(hash.clone());
+                "agent_version" => {
+                    if let MetricValue::String(version) = &metric.value {
+                        self.agent_hash = Some(version.clone());
                    }
                }
                
@@ -390,6 +414,7 @@ impl Widget for SystemWidget {
                "memory_tmp_usage_percent" => {
                    if let MetricValue::Float(usage) = metric.value {
                        self.tmp_usage_percent = Some(usage);
+                        self.tmp_status = metric.status.clone();
                    }
                }
                "memory_tmp_used_gb" => {
@@ -410,40 +435,38 @@ impl Widget for SystemWidget {
        self.update_storage_from_metrics(metrics);
    }

-    fn render(&mut self, frame: &mut Frame, area: Rect) {
-        self.render_with_scroll(frame, area, 0);
-    }
 }

 impl SystemWidget {
    /// Render with scroll offset support
-    pub fn render_with_scroll(&mut self, frame: &mut Frame, area: Rect, scroll_offset: usize) {
+    pub fn render_with_scroll(&mut self, frame: &mut Frame, area: Rect, scroll_offset: usize, hostname: &str, config: Option<&crate::config::DashboardConfig>) {
        let mut lines = Vec::new();

        // NixOS section
        lines.push(Line::from(vec![
-            Span::styled("NixOS:", Typography::widget_title())
+            Span::styled(format!("NixOS {}:", hostname), Typography::widget_title())
        ]));
        
        let build_text = self.nixos_build.as_deref().unwrap_or("unknown");
        lines.push(Line::from(vec![
            Span::styled(format!("Build: {}", build_text), Typography::secondary())
        ]));
-
-        let config_text = self.config_hash.as_deref().unwrap_or("unknown");
+        
+        let agent_version_text = self.agent_hash.as_deref().unwrap_or("unknown");
        lines.push(Line::from(vec![
-            Span::styled(format!("Config: {}", config_text), Typography::secondary())
-        ]));
-
-        let agent_hash_text = self.agent_hash.as_deref().unwrap_or("unknown");
-        let short_hash = if agent_hash_text.len() > 8 && agent_hash_text != "unknown" {
-            &agent_hash_text[..8]
-        } else {
-            agent_hash_text
-        };
-        lines.push(Line::from(vec![
-            Span::styled(format!("Agent: {}", short_hash), Typography::secondary())
+            Span::styled(format!("Agent: {}", agent_version_text), Typography::secondary())
        ]));
+        
+        // Display detected connection IP
+        if let Some(config) = config {
+            if let Some(host_details) = config.hosts.get(hostname) {
+                let detected_ip = host_details.get_connection_ip(hostname);
+                lines.push(Line::from(vec![
+                    Span::styled(format!("IP: {}", detected_ip), Typography::secondary())
+                ]));
+            }
+        }
+        

        // CPU section
        lines.push(Line::from(vec![
@@ -480,7 +503,7 @@ impl SystemWidget {
            Span::styled("  └─ ", Typography::tree()),
        ];
        tmp_spans.extend(StatusIcons::create_status_spans(
-            self.memory_status.clone(),
+            self.tmp_status.clone(),
            &format!("/tmp: {}", tmp_text)
        ));
        lines.push(Line::from(tmp_spans));
--- a/shared/Cargo.toml
+++ b/shared/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cm-dashboard-shared"
-version = "0.1.0"
+version = "0.1.78"
 edition = "2021"

 [dependencies]
--- a/shared/src/metrics.rs
+++ b/shared/src/metrics.rs
@@ -83,10 +83,12 @@ impl MetricValue {
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
 pub enum Status {
    Ok,
+    Inactive, // Same priority as Ok for aggregation
+    Unknown,
+    Offline,
    Pending,
    Warning,
    Critical,
-    Unknown,
 }

 impl Status {
@@ -180,6 +182,16 @@ impl HysteresisThresholds {
                    Status::Ok
                }
            }
+            Status::Inactive => {
+                // Inactive services use normal thresholds like first measurement
+                if value >= self.critical_high {
+                    Status::Critical
+                } else if value >= self.warning_high {
+                    Status::Warning
+                } else {
+                    Status::Ok
+                }
+            }
            Status::Pending => {
                // Service transitioning, use normal thresholds like first measurement
                if value >= self.critical_high {
@@ -190,6 +202,16 @@ impl HysteresisThresholds {
                    Status::Ok
                }
            }
+            Status::Offline => {
+                // Host coming back online, use normal thresholds like first measurement
+                if value >= self.critical_high {
+                    Status::Critical
+                } else if value >= self.warning_high {
+                    Status::Warning
+                } else {
+                    Status::Ok
+                }
+            }
        }
    }
 }
--- a/shared/src/protocol.rs
+++ b/shared/src/protocol.rs
@@ -9,6 +9,17 @@ pub struct MetricMessage {
    pub metrics: Vec<Metric>,
 }

+/// Command output streaming message
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CommandOutputMessage {
+    pub hostname: String,
+    pub command_id: String,
+    pub command_type: String,
+    pub output_line: String,
+    pub is_complete: bool,
+    pub timestamp: u64,
+}
+
 impl MetricMessage {
    pub fn new(hostname: String, metrics: Vec<Metric>) -> Self {
        Self {
@@ -19,6 +30,19 @@ impl MetricMessage {
    }
 }

+impl CommandOutputMessage {
+    pub fn new(hostname: String, command_id: String, command_type: String, output_line: String, is_complete: bool) -> Self {
+        Self {
+            hostname,
+            command_id,
+            command_type,
+            output_line,
+            is_complete,
+            timestamp: chrono::Utc::now().timestamp() as u64,
+        }
+    }
+}
+
 /// Commands that can be sent from dashboard to agent
 #[derive(Debug, Serialize, Deserialize)]
 pub enum Command {
@@ -55,6 +79,7 @@ pub enum MessageType {
    Metrics,
    Command,
    CommandResponse,
+    CommandOutput,
    Heartbeat,
 }

@@ -80,6 +105,13 @@ impl MessageEnvelope {
        })
    }

+    pub fn command_output(message: CommandOutputMessage) -> Result<Self, crate::SharedError> {
+        Ok(Self {
+            message_type: MessageType::CommandOutput,
+            payload: serde_json::to_vec(&message)?,
+        })
+    }
+
    pub fn heartbeat() -> Result<Self, crate::SharedError> {
        Ok(Self {
            message_type: MessageType::Heartbeat,
@@ -113,4 +145,13 @@ impl MessageEnvelope {
            }),
        }
    }
+
+    pub fn decode_command_output(&self) -> Result<CommandOutputMessage, crate::SharedError> {
+        match self.message_type {
+            MessageType::CommandOutput => Ok(serde_json::from_slice(&self.payload)?),
+            _ => Err(crate::SharedError::Protocol {
+                message: "Expected command output message".to_string(),
+            }),
+        }
+    }
 }