- Extract shared send_event/clear_event into detectors/base.py, removing ~150 lines of duplication across all 6 detectors - Fix default aggregator URL from port 5000 to 5100 in all detectors - Standardize cpu.py and memory.py to use active_alerts set pattern - Fix immediate emote rotation on startup (last_emote_change = time.time()) - Extract magic numbers to named constants in aggregator - Protect write_status() with try/except OSError - Fix notify event ID collision with monotonic counter - Replace blocking stream_output() with background daemon threads in kao.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
63 lines
2.1 KiB
Python
63 lines
2.1 KiB
Python
"""
|
|
Memory Usage Detector
|
|
Monitors RAM usage and reports to the aggregator when thresholds are exceeded.
|
|
|
|
Environment variables:
|
|
AGGREGATOR_URL - URL of the aggregator (default: http://localhost:5100)
|
|
CHECK_INTERVAL - Seconds between checks (default: 30)
|
|
THRESHOLD_CRITICAL - Percent usage for critical alert (default: 95)
|
|
THRESHOLD_WARNING - Percent usage for warning alert (default: 85)
|
|
"""
|
|
|
|
import os
|
|
import time
|
|
import psutil
|
|
|
|
from detectors.base import DEFAULT_AGGREGATOR_URL, send_event, clear_event
|
|
|
|
# Configuration from environment
|
|
AGGREGATOR_URL = os.environ.get("AGGREGATOR_URL", DEFAULT_AGGREGATOR_URL)
|
|
CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", 30))
|
|
THRESHOLD_CRITICAL = int(os.environ.get("THRESHOLD_CRITICAL", 95))
|
|
THRESHOLD_WARNING = int(os.environ.get("THRESHOLD_WARNING", 85))
|
|
|
|
EVENT_ID = "memory_usage"
|
|
|
|
|
|
def main():
|
|
print(f"Memory Usage Detector started")
|
|
print(f" Aggregator: {AGGREGATOR_URL}")
|
|
print(f" Interval: {CHECK_INTERVAL}s")
|
|
print(f" Thresholds: Warning={THRESHOLD_WARNING}%, Critical={THRESHOLD_CRITICAL}%")
|
|
print()
|
|
|
|
active_alerts = set()
|
|
|
|
while True:
|
|
mem = psutil.virtual_memory()
|
|
mem_percent = mem.percent
|
|
used_gb = mem.used / (1024 ** 3)
|
|
total_gb = mem.total / (1024 ** 3)
|
|
current_alerts = set()
|
|
|
|
if mem_percent >= THRESHOLD_CRITICAL:
|
|
send_event(AGGREGATOR_URL, EVENT_ID, 1, f"Memory at {mem_percent:.0f}% ({used_gb:.1f}/{total_gb:.1f} GB)", CHECK_INTERVAL)
|
|
current_alerts.add(EVENT_ID)
|
|
elif mem_percent >= THRESHOLD_WARNING:
|
|
send_event(AGGREGATOR_URL, EVENT_ID, 2, f"Memory at {mem_percent:.0f}% ({used_gb:.1f}/{total_gb:.1f} GB)", CHECK_INTERVAL)
|
|
current_alerts.add(EVENT_ID)
|
|
else:
|
|
print(f"[OK] Memory: {mem_percent:.0f}% ({used_gb:.1f}/{total_gb:.1f} GB)")
|
|
|
|
# Clear alerts that are no longer active
|
|
for eid in active_alerts - current_alerts:
|
|
clear_event(AGGREGATOR_URL, eid)
|
|
|
|
active_alerts = current_alerts
|
|
|
|
time.sleep(CHECK_INTERVAL)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|