Bump to v1.5.0: deduplicate detectors, fix aggregator bugs, fix blocking I/O
- Extract shared send_event/clear_event into detectors/base.py, removing ~150 lines of duplication across all 6 detectors - Fix default aggregator URL from port 5000 to 5100 in all detectors - Standardize cpu.py and memory.py to use active_alerts set pattern - Fix immediate emote rotation on startup (last_emote_change = time.time()) - Extract magic numbers to named constants in aggregator - Protect write_status() with try/except OSError - Fix notify event ID collision with monotonic counter - Replace blocking stream_output() with background daemon threads in kao.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,7 @@ Disk Space Detector
|
||||
Monitors all drives and reports to the aggregator when thresholds are exceeded.
|
||||
|
||||
Environment variables:
|
||||
AGGREGATOR_URL - URL of the aggregator (default: http://localhost:5000)
|
||||
AGGREGATOR_URL - URL of the aggregator (default: http://localhost:5100)
|
||||
CHECK_INTERVAL - Seconds between checks (default: 300)
|
||||
THRESHOLD_CRITICAL - Percent usage for critical alert (default: 95)
|
||||
THRESHOLD_WARNING - Percent usage for warning alert (default: 85)
|
||||
@@ -12,10 +12,11 @@ Environment variables:
|
||||
import os
|
||||
import time
|
||||
import shutil
|
||||
import requests
|
||||
|
||||
from detectors.base import DEFAULT_AGGREGATOR_URL, send_event, clear_event
|
||||
|
||||
# Configuration from environment
|
||||
AGGREGATOR_URL = os.environ.get("AGGREGATOR_URL", "http://localhost:5000")
|
||||
AGGREGATOR_URL = os.environ.get("AGGREGATOR_URL", DEFAULT_AGGREGATOR_URL)
|
||||
CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", 300))
|
||||
THRESHOLD_CRITICAL = int(os.environ.get("THRESHOLD_CRITICAL", 95))
|
||||
THRESHOLD_WARNING = int(os.environ.get("THRESHOLD_WARNING", 85))
|
||||
@@ -85,34 +86,6 @@ def check_disk(drive):
|
||||
return None, None, None
|
||||
|
||||
|
||||
def send_event(event_id, priority, message):
|
||||
"""Send an event to the aggregator with heartbeat TTL."""
|
||||
ttl = CHECK_INTERVAL * 2 # Event expires if not refreshed
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{AGGREGATOR_URL}/event",
|
||||
json={"id": event_id, "priority": priority, "message": message, "ttl": ttl},
|
||||
timeout=5
|
||||
)
|
||||
print(f"[EVENT] {event_id}: {message} (priority {priority}, ttl {ttl}s) -> {response.status_code}")
|
||||
except requests.RequestException as e:
|
||||
print(f"[ERROR] Failed to send event: {e}")
|
||||
|
||||
|
||||
def clear_event(event_id):
|
||||
"""Clear an event from the aggregator."""
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{AGGREGATOR_URL}/clear",
|
||||
json={"id": event_id},
|
||||
timeout=5
|
||||
)
|
||||
if response.status_code == 200:
|
||||
print(f"[CLEAR] {event_id}")
|
||||
except requests.RequestException as e:
|
||||
print(f"[ERROR] Failed to clear event: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
print(f"Disk Space Detector started")
|
||||
print(f" Aggregator: {AGGREGATOR_URL}")
|
||||
@@ -139,18 +112,18 @@ def main():
|
||||
|
||||
if percent >= THRESHOLD_CRITICAL:
|
||||
message = f"{drive} at {percent:.0f}% ({used_gb:.1f}/{total_gb:.1f} GB)"
|
||||
send_event(event_id, 1, message)
|
||||
send_event(AGGREGATOR_URL, event_id, 1, message, CHECK_INTERVAL)
|
||||
current_alerts.add(event_id)
|
||||
elif percent >= THRESHOLD_WARNING:
|
||||
message = f"{drive} at {percent:.0f}% ({used_gb:.1f}/{total_gb:.1f} GB)"
|
||||
send_event(event_id, 2, message)
|
||||
send_event(AGGREGATOR_URL, event_id, 2, message, CHECK_INTERVAL)
|
||||
current_alerts.add(event_id)
|
||||
else:
|
||||
print(f"[OK] {drive}: {percent:.0f}%")
|
||||
|
||||
# Clear alerts that are no longer active
|
||||
for event_id in active_alerts - current_alerts:
|
||||
clear_event(event_id)
|
||||
clear_event(AGGREGATOR_URL, event_id)
|
||||
|
||||
active_alerts = current_alerts
|
||||
|
||||
|
||||
Reference in New Issue
Block a user