commit 11896919e41945c59750937a636539eafce7d4fb Author: Spencer Grimes Date: Mon Feb 2 21:04:02 2026 -0600 Initial commit: Sentry-Emote system monitor - Aggregator: Flask-based event broker with priority queue - Frontend: OLED-optimized UI with animations - Detectors: disk, cpu, memory, service, network - Unified entry point (sentry.py) with process management - Heartbeat TTL system for auto-clearing stale events Co-Authored-By: Claude Opus 4.5 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c7ec55 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Virtual environment +venv/ + +# Python +__pycache__/ +*.py[cod] +*.egg-info/ + +# Runtime files +status.json + +# IDE +.vscode/ +.idea/ + +# OS +.DS_Store +Thumbs.db + +# Claude Code +.claude/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..60cfbef --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,121 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Sentry-Emote is a minimalist system status monitor designed for an old Pixel phone used as an ambient display. It uses ASCII "emotes" to represent system health instead of complex graphs. + +## Architecture + +**Publisher/Subscriber model:** + +``` +┌─────────────┐ POST /event ┌─────────────┐ GET /status ┌─────────────┐ +│ Detectors │ ──────────────────▶ │ Aggregator │ ◀────────────────── │ Emote-UI │ +│ (sensors) │ │ (broker) │ │ (display) │ +└─────────────┘ └─────────────┘ └─────────────┘ +``` + +- **Aggregator** (`aggregator.py`) — Flask service managing the event queue and priority logic +- **Detectors** (`detectors/*.py`) — Independent scripts monitoring system metrics +- **Emote-UI** (`index.html`) — OLED-optimized web frontend +- **Sentry** (`sentry.py`) — Unified entry point managing all processes + +## Quick Start + +```bash +# Setup +python -m venv venv +source venv/bin/activate # or .\venv\Scripts\activate on Windows +pip install -r requirements.txt + +# Run everything +python sentry.py +``` + +UI available at http://localhost:5000 + +## Configuration + +Edit `config.json` to configure the aggregator URL, enable/disable detectors, and set thresholds. + +```json +{ + "aggregator_url": "http://localhost:5000", + "aggregator": { "script": "aggregator.py" }, + "detectors": [ + { + "name": "cpu", + "enabled": true, + "script": "detectors/cpu.py", + "env": { + "CHECK_INTERVAL": "30", + "THRESHOLD_WARNING": "85", + "THRESHOLD_CRITICAL": "95" + } + } + ] +} +``` + +## Detectors + +| Detector | Script | Required Env Vars | +|----------|--------|-------------------| +| Disk Space | `detectors/disk_space.py` | — | +| CPU | `detectors/cpu.py` | — | +| Memory | `detectors/memory.py` | — | +| Service | `detectors/service.py` | `SERVICES` (comma-separated process names) | +| Network | `detectors/network.py` | `HOSTS` (comma-separated hostnames/IPs) | + +All detectors support: `AGGREGATOR_URL`, `CHECK_INTERVAL`, `THRESHOLD_WARNING`, `THRESHOLD_CRITICAL` + +## API Endpoints + +- `POST /event` — Register event: `{"id": "name", "priority": 1-4, "message": "optional", "ttl": optional_seconds}` +- `POST /clear` — Clear event: `{"id": "name"}` +- `GET /status` — Current state JSON +- `GET /events` — List active events + +## Priority System + +Lower number = higher priority. Events with a `ttl` auto-expire (heartbeat pattern). + +| Priority | State | Emote | Color | Behavior | +|----------|----------|----------|--------|----------| +| 1 | Critical | `( x_x)` | Red | Shaking animation | +| 2 | Warning | `( o_o)` | Yellow | Breathing animation | +| 3 | Notify | `( 'o')` | Blue | Popping animation, 10s default TTL | +| 4 | Optimal | `( ^_^)` | Green | Default when no events | + +## Testing Events + +```bash +# Warning with 30s TTL +curl -X POST -H "Content-Type: application/json" \ + -d '{"id":"test","priority":2,"message":"Test warning","ttl":30}' \ + http://localhost:5000/event + +# Clear manually +curl -X POST -H "Content-Type: application/json" \ + -d '{"id":"test"}' \ + http://localhost:5000/clear +``` + +## File Structure + +``` +├── sentry.py # Unified entry point +├── aggregator.py # Event broker/API server +├── index.html # OLED-optimized frontend +├── config.json # Runtime configuration +├── detectors/ +│ ├── disk_space.py +│ ├── cpu.py +│ ├── memory.py +│ ├── service.py +│ └── network.py +├── requirements.txt +└── SPEC.md # Original project specification +``` diff --git a/SPEC.md b/SPEC.md new file mode 100644 index 0000000..45f37bf --- /dev/null +++ b/SPEC.md @@ -0,0 +1,79 @@ +# SPEC.md: Project "Sentry-Emote" + +## 1. Overview + +**Purpose:** Repurpose an old Pixel phone (OLED screen) as an ambient, glanceable system status monitor for a home server. +**Design Philosophy:** Minimalist, binary-state, and high-signal. Use an "Emote" (ASCII/Emoji) to represent system health instead of complex graphs. +**Target Device:** Android Pixel (accessed via Fully Kiosk Browser). + +## 2. System Architecture + +The system follows a decoupled **Publisher/Subscriber** model to ensure extensibility. + +- **Aggregator (The Broker):** A central Python service running on the server. It manages the event queue and generates the state. +- **Detectors (The Publishers):** Independent scripts (Python, Bash, etc.) that monitor specific system metrics and "hook" into the Aggregator. +- **Emote-UI (The Subscriber):** A mobile-optimized web frontend that displays the current highest-priority emote. + +## 3. Data Specification + +### 3.1 `status.json` (State Registry) + +The Aggregator outputs this file every time the state changes. + +```json +{ + "current_state": "optimal", + "active_emote": "( ^_^)", + "color": "#00FF00", + "animation": "breathing", + "message": "All systems nominal", + "active_events": [ + { + "id": "disk_check", + "priority": 4, + "message": "Disk 40% full" + } + ], + "last_updated": "2026-02-02T17:30:00" +} +``` + +### 3.2 Priority Hierarchy + +| Level | Name | Priority | Emote | Color | Logic | +| ----- | ------------ | --------- | -------- | ------ | ---------------------------------------- | +| **1** | **Critical** | Emergency | `( x_x)` | Red | Overrules all. Manual clear required. | +| **2** | **Warning** | Caution | `( o_o)` | Yellow | Overrules Optimal. Auto-clears if fixed. | +| **3** | **Notify** | Event | `( 'o')` | Blue | Transient. TTL (Time To Live) of 10s. | +| **4** | **Optimal** | Default | `( ^_^)` | Green | Active when no other events exist. | + +## 4. Component Requirements + +### 4.1 Aggregator (`aggregator.py`) + +- **Event Bus:** Accept HTTP POST requests or watch a specific file/directory for new event signals. +- **State Management:** Maintain a list of "Active Events." +- **TTL Logic:** Automatically remove Priority 3 events after 10 seconds. +- **Deduplication:** If multiple events exist, always select the one with the lowest priority number for the `active_emote` field. + +### 4.2 Emote-UI (`index.html`) + +- **OLED Optimization:** Pure black background (`#000000`). +- **Glanceability:** Massive centered text for the emote. +- **Animations:** - `breathing`: Slow opacity/scale pulse. +- `shaking`: Rapid X-axis jitter for Critical. +- `popping`: Scale-up effect for Notifications. + +- **Refresh:** Long-polling or `setInterval` every 2 seconds. + +### 4.3 Extensibility (The Hook System) + +- New detectors must be able to send an event to the Aggregator without modifying the core code. +- Example Detector Hook: `curl -X POST -d '{"id":"ssh","priority":1}' http://localhost:5000/event` + +## 5. Implementation Roadmap + +1. **Phase 1:** Build the `aggregator.py` with basic JSON output. +2. **Phase 2:** Build the OLED-friendly `index.html` frontend. +3. **Phase 3:** Create the first "Detector" (e.g., a simple disk space checker). +4. **Phase 4:** Implement TTL for transient notifications. diff --git a/aggregator.py b/aggregator.py new file mode 100644 index 0000000..86bc034 --- /dev/null +++ b/aggregator.py @@ -0,0 +1,177 @@ +""" +Sentry-Emote Aggregator +A lightweight event broker that manages priority-based system status. +""" + +import json +import threading +import time +from datetime import datetime +from pathlib import Path +from flask import Flask, request, jsonify, send_from_directory + +app = Flask(__name__, static_folder=".") +ROOT_DIR = Path(__file__).parent + +# Configuration +STATUS_FILE = Path(__file__).parent / "status.json" +DEFAULT_NOTIFY_TTL = 10 # Default TTL for Priority 3 (Notify) events + +# Priority definitions +PRIORITY_CONFIG = { + 1: {"name": "Critical", "emote": "( x_x)", "color": "#FF0000", "animation": "shaking"}, + 2: {"name": "Warning", "emote": "( o_o)", "color": "#FFFF00", "animation": "breathing"}, + 3: {"name": "Notify", "emote": "( 'o')", "color": "#0088FF", "animation": "popping"}, + 4: {"name": "Optimal", "emote": "( ^_^)", "color": "#00FF00", "animation": "breathing"}, +} + +# Thread-safe event storage +events_lock = threading.Lock() +active_events = {} # id -> {priority, message, timestamp, ttl} + + +def get_current_state(): + """Determine current state based on active events.""" + with events_lock: + if not active_events: + priority = 4 + events_list = [] + else: + # Find highest priority (lowest number) + priority = min(e["priority"] for e in active_events.values()) + events_list = [ + {"id": eid, "priority": e["priority"], "message": e.get("message", "")} + for eid, e in active_events.items() + ] + + config = PRIORITY_CONFIG[priority] + return { + "current_state": config["name"].lower(), + "active_emote": config["emote"], + "color": config["color"], + "animation": config["animation"], + "message": config["name"] if priority == 4 else f"{config['name']} state active", + "active_events": sorted(events_list, key=lambda x: x["priority"]), + "last_updated": datetime.now().isoformat(timespec="seconds"), + } + + +def write_status(): + """Write current state to status.json.""" + state = get_current_state() + with open(STATUS_FILE, "w") as f: + json.dump(state, f, indent="\t") + return state + + +def cleanup_expired_events(): + """Background thread to remove expired TTL events.""" + while True: + time.sleep(1) + now = time.time() + expired = [] + + with events_lock: + for eid, event in active_events.items(): + if event.get("ttl") and now > event["ttl"]: + expired.append(eid) + + for eid in expired: + del active_events[eid] + + if expired: + write_status() + + +@app.route("/event", methods=["POST"]) +def post_event(): + """ + Accept a new event. + Expected JSON: {"id": "event_id", "priority": 1-4, "message": "optional", "ttl": optional_seconds} + """ + data = request.get_json(force=True) + + if not data or "id" not in data or "priority" not in data: + return jsonify({"error": "Missing required fields: id, priority"}), 400 + + event_id = str(data["id"]) + priority = int(data["priority"]) + + if priority not in PRIORITY_CONFIG: + return jsonify({"error": f"Invalid priority: {priority}. Must be 1-4."}), 400 + + event = { + "priority": priority, + "message": data.get("message", ""), + "timestamp": time.time(), + } + + # Apply TTL if provided, or use default for Priority 3 (Notify) + if "ttl" in data: + event["ttl"] = time.time() + int(data["ttl"]) + elif priority == 3: + event["ttl"] = time.time() + DEFAULT_NOTIFY_TTL + + with events_lock: + active_events[event_id] = event + + state = write_status() + return jsonify({"status": "ok", "current_state": state}), 200 + + +@app.route("/clear", methods=["POST"]) +def clear_event(): + """ + Clear an event by ID. + Expected JSON: {"id": "event_id"} + """ + data = request.get_json(force=True) + + if not data or "id" not in data: + return jsonify({"error": "Missing required field: id"}), 400 + + event_id = str(data["id"]) + + with events_lock: + if event_id in active_events: + del active_events[event_id] + state = write_status() + return jsonify({"status": "cleared", "current_state": state}), 200 + else: + return jsonify({"error": "Event not found"}), 404 + + +@app.route("/") +def index(): + """Serve the frontend.""" + return send_from_directory(ROOT_DIR, "index.html") + + +@app.route("/status", methods=["GET"]) +def get_status(): + """Return current status as JSON.""" + return jsonify(get_current_state()), 200 + + +@app.route("/events", methods=["GET"]) +def list_events(): + """List all active events.""" + with events_lock: + return jsonify({"events": dict(active_events)}), 200 + + +def main(): + # Write initial optimal state + write_status() + print(f"Status file: {STATUS_FILE}") + + # Start TTL cleanup thread + cleanup_thread = threading.Thread(target=cleanup_expired_events, daemon=True) + cleanup_thread.start() + + # Run Flask + app.run(host="0.0.0.0", port=5000, threaded=True) + + +if __name__ == "__main__": + main() diff --git a/config.json b/config.json new file mode 100644 index 0000000..60841db --- /dev/null +++ b/config.json @@ -0,0 +1,57 @@ +{ + "aggregator_url": "http://localhost:5000", + "aggregator": { + "script": "aggregator.py" + }, + "detectors": [ + { + "name": "disk_space", + "enabled": true, + "script": "detectors/disk_space.py", + "env": { + "CHECK_INTERVAL": "300", + "THRESHOLD_WARNING": "85", + "THRESHOLD_CRITICAL": "95" + } + }, + { + "name": "cpu", + "enabled": true, + "script": "detectors/cpu.py", + "env": { + "CHECK_INTERVAL": "30", + "THRESHOLD_WARNING": "85", + "THRESHOLD_CRITICAL": "95" + } + }, + { + "name": "memory", + "enabled": true, + "script": "detectors/memory.py", + "env": { + "CHECK_INTERVAL": "30", + "THRESHOLD_WARNING": "85", + "THRESHOLD_CRITICAL": "95" + } + }, + { + "name": "service", + "enabled": false, + "script": "detectors/service.py", + "env": { + "CHECK_INTERVAL": "30", + "SERVICES": "nginx,postgres" + } + }, + { + "name": "network", + "enabled": false, + "script": "detectors/network.py", + "env": { + "CHECK_INTERVAL": "60", + "HOSTS": "8.8.8.8,google.com", + "TIMEOUT": "5" + } + } + ] +} diff --git a/detectors/cpu.py b/detectors/cpu.py new file mode 100644 index 0000000..7c94959 --- /dev/null +++ b/detectors/cpu.py @@ -0,0 +1,83 @@ +""" +CPU Usage Detector +Monitors CPU usage and reports to the aggregator when thresholds are exceeded. + +Environment variables: + AGGREGATOR_URL - URL of the aggregator (default: http://localhost:5000) + CHECK_INTERVAL - Seconds between checks (default: 30) + THRESHOLD_CRITICAL - Percent usage for critical alert (default: 95) + THRESHOLD_WARNING - Percent usage for warning alert (default: 85) +""" + +import os +import time +import psutil +import requests + +# Configuration from environment +AGGREGATOR_URL = os.environ.get("AGGREGATOR_URL", "http://localhost:5000") +CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", 30)) +THRESHOLD_CRITICAL = int(os.environ.get("THRESHOLD_CRITICAL", 95)) +THRESHOLD_WARNING = int(os.environ.get("THRESHOLD_WARNING", 85)) + +EVENT_ID = "cpu_usage" + + +def send_event(priority, message): + """Send an event to the aggregator with heartbeat TTL.""" + ttl = CHECK_INTERVAL * 2 + try: + response = requests.post( + f"{AGGREGATOR_URL}/event", + json={"id": EVENT_ID, "priority": priority, "message": message, "ttl": ttl}, + timeout=5 + ) + print(f"[EVENT] {message} (priority {priority}, ttl {ttl}s) -> {response.status_code}") + except requests.RequestException as e: + print(f"[ERROR] Failed to send event: {e}") + + +def clear_event(): + """Clear the event from the aggregator.""" + try: + response = requests.post( + f"{AGGREGATOR_URL}/clear", + json={"id": EVENT_ID}, + timeout=5 + ) + if response.status_code == 200: + print(f"[CLEAR] {EVENT_ID}") + except requests.RequestException as e: + print(f"[ERROR] Failed to clear event: {e}") + + +def main(): + print(f"CPU Usage Detector started") + print(f" Aggregator: {AGGREGATOR_URL}") + print(f" Interval: {CHECK_INTERVAL}s") + print(f" Thresholds: Warning={THRESHOLD_WARNING}%, Critical={THRESHOLD_CRITICAL}%") + print() + + alert_active = False + + while True: + # Get CPU usage over a 1-second sample + cpu_percent = psutil.cpu_percent(interval=1) + + if cpu_percent >= THRESHOLD_CRITICAL: + send_event(1, f"CPU at {cpu_percent:.0f}%") + alert_active = True + elif cpu_percent >= THRESHOLD_WARNING: + send_event(2, f"CPU at {cpu_percent:.0f}%") + alert_active = True + else: + print(f"[OK] CPU: {cpu_percent:.0f}%") + if alert_active: + clear_event() + alert_active = False + + time.sleep(CHECK_INTERVAL - 1) # Account for 1s sample time + + +if __name__ == "__main__": + main() diff --git a/detectors/disk_space.py b/detectors/disk_space.py new file mode 100644 index 0000000..45796c4 --- /dev/null +++ b/detectors/disk_space.py @@ -0,0 +1,159 @@ +""" +Disk Space Detector +Monitors all drives and reports to the aggregator when thresholds are exceeded. + +Environment variables: + AGGREGATOR_URL - URL of the aggregator (default: http://localhost:5000) + CHECK_INTERVAL - Seconds between checks (default: 300) + THRESHOLD_CRITICAL - Percent usage for critical alert (default: 95) + THRESHOLD_WARNING - Percent usage for warning alert (default: 85) +""" + +import os +import time +import shutil +import requests + +# Configuration from environment +AGGREGATOR_URL = os.environ.get("AGGREGATOR_URL", "http://localhost:5000") +CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", 300)) +THRESHOLD_CRITICAL = int(os.environ.get("THRESHOLD_CRITICAL", 95)) +THRESHOLD_WARNING = int(os.environ.get("THRESHOLD_WARNING", 85)) + + +def get_all_drives(): + """Get list of mounted drives/partitions.""" + import platform + drives = [] + + if platform.system() == "Windows": + import string + for letter in string.ascii_uppercase: + drive = f"{letter}:\\" + try: + shutil.disk_usage(drive) + drives.append(drive) + except (FileNotFoundError, PermissionError, OSError): + pass + else: + # Linux/macOS - parse /proc/mounts or /etc/mtab for real filesystems + seen_devices = set() + try: + with open("/proc/mounts", "r") as f: + for line in f: + parts = line.split() + if len(parts) < 2: + continue + device, mount = parts[0], parts[1] + # Skip virtual filesystems + if not device.startswith("/dev/"): + continue + # Skip duplicate devices (e.g., bind mounts) + if device in seen_devices: + continue + seen_devices.add(device) + try: + shutil.disk_usage(mount) + drives.append(mount) + except (FileNotFoundError, PermissionError, OSError): + pass + except FileNotFoundError: + # Fallback for macOS or systems without /proc/mounts + for mount in ["/", "/home", "/var"]: + if os.path.exists(mount): + try: + shutil.disk_usage(mount) + drives.append(mount) + except (FileNotFoundError, PermissionError, OSError): + pass + + return drives + + +def check_disk(drive): + """Check disk usage for a drive. Returns (percent_used, total_gb, used_gb).""" + try: + usage = shutil.disk_usage(drive) + total_gb = usage.total / (1024 ** 3) + used_gb = usage.used / (1024 ** 3) + percent = (usage.used / usage.total) * 100 + return percent, total_gb, used_gb + except Exception: + return None, None, None + + +def send_event(event_id, priority, message): + """Send an event to the aggregator with heartbeat TTL.""" + ttl = CHECK_INTERVAL * 2 # Event expires if not refreshed + try: + response = requests.post( + f"{AGGREGATOR_URL}/event", + json={"id": event_id, "priority": priority, "message": message, "ttl": ttl}, + timeout=5 + ) + print(f"[EVENT] {event_id}: {message} (priority {priority}, ttl {ttl}s) -> {response.status_code}") + except requests.RequestException as e: + print(f"[ERROR] Failed to send event: {e}") + + +def clear_event(event_id): + """Clear an event from the aggregator.""" + try: + response = requests.post( + f"{AGGREGATOR_URL}/clear", + json={"id": event_id}, + timeout=5 + ) + if response.status_code == 200: + print(f"[CLEAR] {event_id}") + except requests.RequestException as e: + print(f"[ERROR] Failed to clear event: {e}") + + +def main(): + print(f"Disk Space Detector started") + print(f" Aggregator: {AGGREGATOR_URL}") + print(f" Interval: {CHECK_INTERVAL}s") + print(f" Thresholds: Warning={THRESHOLD_WARNING}%, Critical={THRESHOLD_CRITICAL}%") + print() + + # Track active alerts to know when to clear + active_alerts = set() + + while True: + drives = get_all_drives() + print(f"[CHECK] Scanning {len(drives)} drive(s)...") + + current_alerts = set() + + for drive in drives: + percent, total_gb, used_gb = check_disk(drive) + if percent is None: + continue + + # Create a clean event ID from drive path + event_id = f"disk_{drive.replace(':', '').replace('/', '_').replace('\\', '').strip('_') or 'root'}" + + if percent >= THRESHOLD_CRITICAL: + message = f"{drive} at {percent:.0f}% ({used_gb:.1f}/{total_gb:.1f} GB)" + send_event(event_id, 1, message) + current_alerts.add(event_id) + elif percent >= THRESHOLD_WARNING: + message = f"{drive} at {percent:.0f}% ({used_gb:.1f}/{total_gb:.1f} GB)" + send_event(event_id, 2, message) + current_alerts.add(event_id) + else: + print(f"[OK] {drive}: {percent:.0f}%") + + # Clear alerts that are no longer active + for event_id in active_alerts - current_alerts: + clear_event(event_id) + + active_alerts = current_alerts + + print(f"[SLEEP] Next check in {CHECK_INTERVAL}s\n") + time.sleep(CHECK_INTERVAL) + + +if __name__ == "__main__": + main() diff --git a/detectors/memory.py b/detectors/memory.py new file mode 100644 index 0000000..8f3b556 --- /dev/null +++ b/detectors/memory.py @@ -0,0 +1,85 @@ +""" +Memory Usage Detector +Monitors RAM usage and reports to the aggregator when thresholds are exceeded. + +Environment variables: + AGGREGATOR_URL - URL of the aggregator (default: http://localhost:5000) + CHECK_INTERVAL - Seconds between checks (default: 30) + THRESHOLD_CRITICAL - Percent usage for critical alert (default: 95) + THRESHOLD_WARNING - Percent usage for warning alert (default: 85) +""" + +import os +import time +import psutil +import requests + +# Configuration from environment +AGGREGATOR_URL = os.environ.get("AGGREGATOR_URL", "http://localhost:5000") +CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", 30)) +THRESHOLD_CRITICAL = int(os.environ.get("THRESHOLD_CRITICAL", 95)) +THRESHOLD_WARNING = int(os.environ.get("THRESHOLD_WARNING", 85)) + +EVENT_ID = "memory_usage" + + +def send_event(priority, message): + """Send an event to the aggregator with heartbeat TTL.""" + ttl = CHECK_INTERVAL * 2 + try: + response = requests.post( + f"{AGGREGATOR_URL}/event", + json={"id": EVENT_ID, "priority": priority, "message": message, "ttl": ttl}, + timeout=5 + ) + print(f"[EVENT] {message} (priority {priority}, ttl {ttl}s) -> {response.status_code}") + except requests.RequestException as e: + print(f"[ERROR] Failed to send event: {e}") + + +def clear_event(): + """Clear the event from the aggregator.""" + try: + response = requests.post( + f"{AGGREGATOR_URL}/clear", + json={"id": EVENT_ID}, + timeout=5 + ) + if response.status_code == 200: + print(f"[CLEAR] {EVENT_ID}") + except requests.RequestException as e: + print(f"[ERROR] Failed to clear event: {e}") + + +def main(): + print(f"Memory Usage Detector started") + print(f" Aggregator: {AGGREGATOR_URL}") + print(f" Interval: {CHECK_INTERVAL}s") + print(f" Thresholds: Warning={THRESHOLD_WARNING}%, Critical={THRESHOLD_CRITICAL}%") + print() + + alert_active = False + + while True: + mem = psutil.virtual_memory() + mem_percent = mem.percent + used_gb = mem.used / (1024 ** 3) + total_gb = mem.total / (1024 ** 3) + + if mem_percent >= THRESHOLD_CRITICAL: + send_event(1, f"Memory at {mem_percent:.0f}% ({used_gb:.1f}/{total_gb:.1f} GB)") + alert_active = True + elif mem_percent >= THRESHOLD_WARNING: + send_event(2, f"Memory at {mem_percent:.0f}% ({used_gb:.1f}/{total_gb:.1f} GB)") + alert_active = True + else: + print(f"[OK] Memory: {mem_percent:.0f}% ({used_gb:.1f}/{total_gb:.1f} GB)") + if alert_active: + clear_event() + alert_active = False + + time.sleep(CHECK_INTERVAL) + + +if __name__ == "__main__": + main() diff --git a/detectors/network.py b/detectors/network.py new file mode 100644 index 0000000..e5d3e93 --- /dev/null +++ b/detectors/network.py @@ -0,0 +1,115 @@ +""" +Network/Ping Detector +Monitors if hosts are reachable via ping. + +Environment variables: + AGGREGATOR_URL - URL of the aggregator (default: http://localhost:5000) + CHECK_INTERVAL - Seconds between checks (default: 60) + HOSTS - Comma-separated list of hosts to ping (required) + Example: "8.8.8.8,google.com,192.168.1.1" + TIMEOUT - Ping timeout in seconds (default: 5) +""" + +import os +import sys +import time +import platform +import subprocess +import requests + +# Configuration from environment +AGGREGATOR_URL = os.environ.get("AGGREGATOR_URL", "http://localhost:5000") +CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", 60)) +HOSTS = os.environ.get("HOSTS", "") +TIMEOUT = int(os.environ.get("TIMEOUT", 5)) + + +def ping(host): + """Ping a host. Returns True if reachable.""" + param = "-n" if platform.system().lower() == "windows" else "-c" + timeout_param = "-w" if platform.system().lower() == "windows" else "-W" + timeout_val = str(TIMEOUT * 1000) if platform.system().lower() == "windows" else str(TIMEOUT) + + try: + result = subprocess.run( + ["ping", param, "1", timeout_param, timeout_val, host], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=TIMEOUT + 2 + ) + return result.returncode == 0 + except subprocess.TimeoutExpired: + return False + except Exception: + return False + + +def send_event(event_id, priority, message): + """Send an event to the aggregator with heartbeat TTL.""" + ttl = CHECK_INTERVAL * 2 + try: + response = requests.post( + f"{AGGREGATOR_URL}/event", + json={"id": event_id, "priority": priority, "message": message, "ttl": ttl}, + timeout=5 + ) + print(f"[EVENT] {event_id}: {message} (priority {priority}, ttl {ttl}s) -> {response.status_code}") + except requests.RequestException as e: + print(f"[ERROR] Failed to send event: {e}") + + +def clear_event(event_id): + """Clear the event from the aggregator.""" + try: + response = requests.post( + f"{AGGREGATOR_URL}/clear", + json={"id": event_id}, + timeout=5 + ) + if response.status_code == 200: + print(f"[CLEAR] {event_id}") + except requests.RequestException as e: + print(f"[ERROR] Failed to clear event: {e}") + + +def main(): + if not HOSTS: + print("ERROR: HOSTS environment variable is required") + print("Example: HOSTS=8.8.8.8,google.com python detectors/network.py") + sys.exit(1) + + hosts = [h.strip() for h in HOSTS.split(",") if h.strip()] + + print(f"Network/Ping Detector started") + print(f" Aggregator: {AGGREGATOR_URL}") + print(f" Interval: {CHECK_INTERVAL}s") + print(f" Timeout: {TIMEOUT}s") + print(f" Monitoring: {', '.join(hosts)}") + print() + + # Track which hosts have active alerts + active_alerts = set() + + while True: + current_alerts = set() + + for host in hosts: + event_id = f"ping_{host.replace('.', '_').replace(':', '_')}" + + if ping(host): + print(f"[OK] Host '{host}' is reachable") + else: + send_event(event_id, 1, f"Host '{host}' is unreachable") + current_alerts.add(event_id) + + # Clear alerts for hosts that are now reachable + for event_id in active_alerts - current_alerts: + clear_event(event_id) + + active_alerts = current_alerts + + time.sleep(CHECK_INTERVAL) + + +if __name__ == "__main__": + main() diff --git a/detectors/service.py b/detectors/service.py new file mode 100644 index 0000000..83c2edf --- /dev/null +++ b/detectors/service.py @@ -0,0 +1,108 @@ +""" +Service Health Detector +Monitors if specific processes/services are running. + +Environment variables: + AGGREGATOR_URL - URL of the aggregator (default: http://localhost:5000) + CHECK_INTERVAL - Seconds between checks (default: 30) + SERVICES - Comma-separated list of process names to monitor (required) + Example: "nginx,postgres,redis" +""" + +import os +import sys +import time +import psutil +import requests + +# Configuration from environment +AGGREGATOR_URL = os.environ.get("AGGREGATOR_URL", "http://localhost:5000") +CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", 30)) +SERVICES = os.environ.get("SERVICES", "") + + +def get_running_processes(): + """Get set of running process names.""" + running = set() + for proc in psutil.process_iter(['name']): + try: + name = proc.info['name'] + if name: + # Store both with and without common extensions + running.add(name.lower()) + if name.lower().endswith('.exe'): + running.add(name.lower()[:-4]) + except (psutil.NoSuchProcess, psutil.AccessDenied): + pass + return running + + +def send_event(event_id, priority, message): + """Send an event to the aggregator with heartbeat TTL.""" + ttl = CHECK_INTERVAL * 2 + try: + response = requests.post( + f"{AGGREGATOR_URL}/event", + json={"id": event_id, "priority": priority, "message": message, "ttl": ttl}, + timeout=5 + ) + print(f"[EVENT] {event_id}: {message} (priority {priority}, ttl {ttl}s) -> {response.status_code}") + except requests.RequestException as e: + print(f"[ERROR] Failed to send event: {e}") + + +def clear_event(event_id): + """Clear the event from the aggregator.""" + try: + response = requests.post( + f"{AGGREGATOR_URL}/clear", + json={"id": event_id}, + timeout=5 + ) + if response.status_code == 200: + print(f"[CLEAR] {event_id}") + except requests.RequestException as e: + print(f"[ERROR] Failed to clear event: {e}") + + +def main(): + if not SERVICES: + print("ERROR: SERVICES environment variable is required") + print("Example: SERVICES=nginx,postgres,redis python detectors/service.py") + sys.exit(1) + + services = [s.strip().lower() for s in SERVICES.split(",") if s.strip()] + + print(f"Service Health Detector started") + print(f" Aggregator: {AGGREGATOR_URL}") + print(f" Interval: {CHECK_INTERVAL}s") + print(f" Monitoring: {', '.join(services)}") + print() + + # Track which services have active alerts + active_alerts = set() + + while True: + running = get_running_processes() + current_alerts = set() + + for service in services: + event_id = f"service_{service}" + + if service not in running: + send_event(event_id, 1, f"Service '{service}' is not running") + current_alerts.add(event_id) + else: + print(f"[OK] Service '{service}' is running") + + # Clear alerts for services that are now running + for event_id in active_alerts - current_alerts: + clear_event(event_id) + + active_alerts = current_alerts + + time.sleep(CHECK_INTERVAL) + + +if __name__ == "__main__": + main() diff --git a/index.html b/index.html new file mode 100644 index 0000000..c2bf065 --- /dev/null +++ b/index.html @@ -0,0 +1,122 @@ + + + + + + Sentry-Emote + + + +
( ^_^)
+
Loading...
+ + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f8c52ee --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +blinker==1.9.0 +certifi==2026.1.4 +charset-normalizer==3.4.4 +click==8.3.1 +colorama==0.4.6 +Flask==3.1.2 +idna==3.11 +itsdangerous==2.2.0 +Jinja2==3.1.6 +MarkupSafe==3.0.3 +psutil==7.2.2 +requests==2.32.5 +urllib3==2.6.3 +Werkzeug==3.1.5 diff --git a/sentry.py b/sentry.py new file mode 100644 index 0000000..8dacd28 --- /dev/null +++ b/sentry.py @@ -0,0 +1,264 @@ +""" +Sentry-Emote +Single entry point for the entire system - aggregator + all detectors. + +Usage: + python sentry.py [--config config.json] +""" + +import json +import os +import signal +import subprocess +import sys +import time +from pathlib import Path + +import requests + +# Configuration +DEFAULT_CONFIG = "config.json" +RESTART_DELAY = 5 +AGGREGATOR_STARTUP_TIMEOUT = 10 + + +class SentryEmote: + def __init__(self, config_path): + self.config_path = Path(config_path) + self.base_dir = self.config_path.parent + self.processes = {} # name -> {process, config} + self.running = True + self.config = None + + def load_config(self): + """Load configuration from JSON file.""" + with open(self.config_path) as f: + self.config = json.load(f) + return self.config + + def start_process(self, name, script, env=None): + """Start a Python script as a subprocess.""" + script_path = self.base_dir / script + + if not script_path.exists(): + print(f"[{name}] Script not found: {script_path}") + return None + + # Build environment + proc_env = os.environ.copy() + if env: + proc_env.update(env) + + try: + process = subprocess.Popen( + [sys.executable, "-u", str(script_path)], + env=proc_env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + bufsize=1, + universal_newlines=True, + ) + print(f"[{name}] Started (PID {process.pid})") + return process + except Exception as e: + print(f"[{name}] Failed to start: {e}") + return None + + def wait_for_aggregator(self, url, timeout=AGGREGATOR_STARTUP_TIMEOUT): + """Wait for the aggregator to become available.""" + print(f"[aggregator] Waiting for service at {url}...") + start = time.time() + while time.time() - start < timeout: + try: + response = requests.get(f"{url}/status", timeout=2) + if response.status_code == 200: + print(f"[aggregator] Service ready") + return True + except requests.RequestException: + pass + time.sleep(0.5) + print(f"[aggregator] Timeout waiting for service") + return False + + def stream_output(self, name, process): + """Read and print output from a process (non-blocking).""" + if process.stdout: + while True: + line = process.stdout.readline() + if not line: + break + print(f"[{name}] {line.rstrip()}") + + def start_aggregator(self): + """Start the aggregator service.""" + agg_config = self.config.get("aggregator", {}) + script = agg_config.get("script", "aggregator.py") + env = agg_config.get("env", {}) + + process = self.start_process("aggregator", script, env) + if process: + self.processes["aggregator"] = { + "process": process, + "config": {"name": "aggregator", "script": script, "env": env}, + } + # Wait for aggregator to be ready + url = self.config.get("aggregator_url", "http://localhost:5000") + return self.wait_for_aggregator(url) + return False + + def start_detectors(self): + """Start all enabled detectors.""" + url = self.config.get("aggregator_url", "http://localhost:5000") + + for detector in self.config.get("detectors", []): + if not detector.get("enabled", True): + continue + + name = detector["name"] + env = {"AGGREGATOR_URL": url} + env.update(detector.get("env", {})) + + process = self.start_process(name, detector["script"], env) + if process: + self.processes[name] = { + "process": process, + "config": detector, + } + + def check_processes(self): + """Check for crashed processes and restart them.""" + for name, info in list(self.processes.items()): + process = info["process"] + + # Stream any available output + self.stream_output(name, process) + + # Check if process has exited + if process.poll() is not None: + print(f"[{name}] Exited with code {process.returncode}, restarting in {RESTART_DELAY}s...") + time.sleep(RESTART_DELAY) + + if self.running: + config = info["config"] + env = {"AGGREGATOR_URL": self.config.get("aggregator_url", "http://localhost:5000")} + env.update(config.get("env", {})) + + new_process = self.start_process(name, config["script"], env) + if new_process: + self.processes[name]["process"] = new_process + + def stop_all(self): + """Stop all processes (detectors first, then aggregator).""" + self.running = False + print("\nShutting down Sentry-Emote...") + + # Stop detectors first + for name, info in list(self.processes.items()): + if name == "aggregator": + continue + process = info["process"] + if process.poll() is None: + print(f"[{name}] Stopping...") + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + + # Stop aggregator last + if "aggregator" in self.processes: + process = self.processes["aggregator"]["process"] + if process.poll() is None: + print(f"[aggregator] Stopping...") + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + + print("Sentry-Emote stopped.") + + def run(self): + """Main run loop.""" + self.load_config() + + print("=" * 50) + print(" Sentry-Emote") + print("=" * 50) + print(f"Config: {self.config_path}") + print(f"Aggregator URL: {self.config.get('aggregator_url')}") + print() + + enabled = [d["name"] for d in self.config.get("detectors", []) if d.get("enabled", True)] + disabled = [d["name"] for d in self.config.get("detectors", []) if not d.get("enabled", True)] + + print(f"Detectors enabled: {', '.join(enabled) or 'none'}") + if disabled: + print(f"Detectors disabled: {', '.join(disabled)}") + print() + + # Start aggregator first + if not self.start_aggregator(): + print("Failed to start aggregator, exiting.") + self.stop_all() + return + + # Give it a moment to initialize + time.sleep(1) + + # Start detectors + self.start_detectors() + + print() + print("=" * 50) + print(f" UI available at: {self.config.get('aggregator_url')}") + print("=" * 50) + print() + + try: + while self.running: + self.check_processes() + time.sleep(1) + except KeyboardInterrupt: + pass + finally: + self.stop_all() + + +def main(): + # Parse arguments + config_path = DEFAULT_CONFIG + if len(sys.argv) > 1: + if sys.argv[1] in ("-h", "--help"): + print(__doc__) + sys.exit(0) + elif sys.argv[1] == "--config" and len(sys.argv) > 2: + config_path = sys.argv[2] + else: + config_path = sys.argv[1] + + # Resolve config path + config_path = Path(config_path) + if not config_path.is_absolute(): + config_path = Path(__file__).parent / config_path + + if not config_path.exists(): + print(f"Config file not found: {config_path}") + sys.exit(1) + + # Setup signal handlers + sentry = SentryEmote(config_path) + + def signal_handler(sig, frame): + sentry.stop_all() + sys.exit(0) + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + # Run + sentry.run() + + +if __name__ == "__main__": + main()