- Aggregator: Flask-based event broker with priority queue - Frontend: OLED-optimized UI with animations - Detectors: disk, cpu, memory, service, network - Unified entry point (sentry.py) with process management - Heartbeat TTL system for auto-clearing stale events Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
116 lines
3.5 KiB
Python
116 lines
3.5 KiB
Python
"""
|
|
Network/Ping Detector
|
|
Monitors if hosts are reachable via ping.
|
|
|
|
Environment variables:
|
|
AGGREGATOR_URL - URL of the aggregator (default: http://localhost:5000)
|
|
CHECK_INTERVAL - Seconds between checks (default: 60)
|
|
HOSTS - Comma-separated list of hosts to ping (required)
|
|
Example: "8.8.8.8,google.com,192.168.1.1"
|
|
TIMEOUT - Ping timeout in seconds (default: 5)
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import platform
|
|
import subprocess
|
|
import requests
|
|
|
|
# Configuration from environment
|
|
AGGREGATOR_URL = os.environ.get("AGGREGATOR_URL", "http://localhost:5000")
|
|
CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", 60))
|
|
HOSTS = os.environ.get("HOSTS", "")
|
|
TIMEOUT = int(os.environ.get("TIMEOUT", 5))
|
|
|
|
|
|
def ping(host):
|
|
"""Ping a host. Returns True if reachable."""
|
|
param = "-n" if platform.system().lower() == "windows" else "-c"
|
|
timeout_param = "-w" if platform.system().lower() == "windows" else "-W"
|
|
timeout_val = str(TIMEOUT * 1000) if platform.system().lower() == "windows" else str(TIMEOUT)
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
["ping", param, "1", timeout_param, timeout_val, host],
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.DEVNULL,
|
|
timeout=TIMEOUT + 2
|
|
)
|
|
return result.returncode == 0
|
|
except subprocess.TimeoutExpired:
|
|
return False
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def send_event(event_id, priority, message):
|
|
"""Send an event to the aggregator with heartbeat TTL."""
|
|
ttl = CHECK_INTERVAL * 2
|
|
try:
|
|
response = requests.post(
|
|
f"{AGGREGATOR_URL}/event",
|
|
json={"id": event_id, "priority": priority, "message": message, "ttl": ttl},
|
|
timeout=5
|
|
)
|
|
print(f"[EVENT] {event_id}: {message} (priority {priority}, ttl {ttl}s) -> {response.status_code}")
|
|
except requests.RequestException as e:
|
|
print(f"[ERROR] Failed to send event: {e}")
|
|
|
|
|
|
def clear_event(event_id):
|
|
"""Clear the event from the aggregator."""
|
|
try:
|
|
response = requests.post(
|
|
f"{AGGREGATOR_URL}/clear",
|
|
json={"id": event_id},
|
|
timeout=5
|
|
)
|
|
if response.status_code == 200:
|
|
print(f"[CLEAR] {event_id}")
|
|
except requests.RequestException as e:
|
|
print(f"[ERROR] Failed to clear event: {e}")
|
|
|
|
|
|
def main():
|
|
if not HOSTS:
|
|
print("ERROR: HOSTS environment variable is required")
|
|
print("Example: HOSTS=8.8.8.8,google.com python detectors/network.py")
|
|
sys.exit(1)
|
|
|
|
hosts = [h.strip() for h in HOSTS.split(",") if h.strip()]
|
|
|
|
print(f"Network/Ping Detector started")
|
|
print(f" Aggregator: {AGGREGATOR_URL}")
|
|
print(f" Interval: {CHECK_INTERVAL}s")
|
|
print(f" Timeout: {TIMEOUT}s")
|
|
print(f" Monitoring: {', '.join(hosts)}")
|
|
print()
|
|
|
|
# Track which hosts have active alerts
|
|
active_alerts = set()
|
|
|
|
while True:
|
|
current_alerts = set()
|
|
|
|
for host in hosts:
|
|
event_id = f"ping_{host.replace('.', '_').replace(':', '_')}"
|
|
|
|
if ping(host):
|
|
print(f"[OK] Host '{host}' is reachable")
|
|
else:
|
|
send_event(event_id, 1, f"Host '{host}' is unreachable")
|
|
current_alerts.add(event_id)
|
|
|
|
# Clear alerts for hosts that are now reachable
|
|
for event_id in active_alerts - current_alerts:
|
|
clear_event(event_id)
|
|
|
|
active_alerts = current_alerts
|
|
|
|
time.sleep(CHECK_INTERVAL)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|