Initial commit: Sentry-Emote system monitor

- Aggregator: Flask-based event broker with priority queue
- Frontend: OLED-optimized UI with animations
- Detectors: disk, cpu, memory, service, network
- Unified entry point (sentry.py) with process management
- Heartbeat TTL system for auto-clearing stale events

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-02 21:04:02 -06:00
commit 11896919e4
13 changed files with 1405 additions and 0 deletions

108
detectors/service.py Normal file
View File

@@ -0,0 +1,108 @@
"""
Service Health Detector
Monitors if specific processes/services are running.
Environment variables:
AGGREGATOR_URL - URL of the aggregator (default: http://localhost:5000)
CHECK_INTERVAL - Seconds between checks (default: 30)
SERVICES - Comma-separated list of process names to monitor (required)
Example: "nginx,postgres,redis"
"""
import os
import sys
import time
import psutil
import requests
# Configuration from environment
AGGREGATOR_URL = os.environ.get("AGGREGATOR_URL", "http://localhost:5000")
CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", 30))
SERVICES = os.environ.get("SERVICES", "")
def get_running_processes():
"""Get set of running process names."""
running = set()
for proc in psutil.process_iter(['name']):
try:
name = proc.info['name']
if name:
# Store both with and without common extensions
running.add(name.lower())
if name.lower().endswith('.exe'):
running.add(name.lower()[:-4])
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
return running
def send_event(event_id, priority, message):
"""Send an event to the aggregator with heartbeat TTL."""
ttl = CHECK_INTERVAL * 2
try:
response = requests.post(
f"{AGGREGATOR_URL}/event",
json={"id": event_id, "priority": priority, "message": message, "ttl": ttl},
timeout=5
)
print(f"[EVENT] {event_id}: {message} (priority {priority}, ttl {ttl}s) -> {response.status_code}")
except requests.RequestException as e:
print(f"[ERROR] Failed to send event: {e}")
def clear_event(event_id):
"""Clear the event from the aggregator."""
try:
response = requests.post(
f"{AGGREGATOR_URL}/clear",
json={"id": event_id},
timeout=5
)
if response.status_code == 200:
print(f"[CLEAR] {event_id}")
except requests.RequestException as e:
print(f"[ERROR] Failed to clear event: {e}")
def main():
if not SERVICES:
print("ERROR: SERVICES environment variable is required")
print("Example: SERVICES=nginx,postgres,redis python detectors/service.py")
sys.exit(1)
services = [s.strip().lower() for s in SERVICES.split(",") if s.strip()]
print(f"Service Health Detector started")
print(f" Aggregator: {AGGREGATOR_URL}")
print(f" Interval: {CHECK_INTERVAL}s")
print(f" Monitoring: {', '.join(services)}")
print()
# Track which services have active alerts
active_alerts = set()
while True:
running = get_running_processes()
current_alerts = set()
for service in services:
event_id = f"service_{service}"
if service not in running:
send_event(event_id, 1, f"Service '{service}' is not running")
current_alerts.add(event_id)
else:
print(f"[OK] Service '{service}' is running")
# Clear alerts for services that are now running
for event_id in active_alerts - current_alerts:
clear_event(event_id)
active_alerts = current_alerts
time.sleep(CHECK_INTERVAL)
if __name__ == "__main__":
main()