feat: Implemented data storage and baselining

2025-08-18 12:49:51 -05:00
parent 3d74bf13f5
commit 9ac382e23e
3 changed files with 30 additions and 50 deletions
--- a/SPEC.md
+++ b/SPEC.md
@@ -55,9 +55,10 @@ The project will be composed of the following files:
 - The loop will execute the data collection, analysis, and alerting steps periodically.
 - The frequency of the monitoring loop will be configurable.

-## 4. Future Features
+## 4. Data Storage and Baselining

- **4.1. Data Storage and Averaging**: Store historical system data to calculate baseline averages for more accurate anomaly detection.
+- **4.1. Data Storage**: The agent will store historical monitoring data in a JSON file (`monitoring_data.json`).
+- **4.2. Baselining**: The agent will calculate baseline averages for key metrics (e.g., RTT, packet loss) from the stored historical data. This baseline will be used by the LLM to improve anomaly detection accuracy.

 ## 5. Technical Requirements

--- a/data_storage.py
+++ b/data_storage.py
@@ -1,59 +1,36 @@
-# Data Storage for the LLM-Powered Monitoring Agent
-
 import json
+import os
+from datetime import datetime, timedelta

-DATA_FILE = "historical_data.json"
+DATA_FILE = 'monitoring_data.json'

-def store_data(data):
-    """Stores data in a JSON file."""
-    try:
-        with open(DATA_FILE, 'r+') as f:
-            try:
-                historical_data = json.load(f)
-            except json.JSONDecodeError:
-                historical_data = []
-            historical_data.append(data)
-            f.seek(0)
-            json.dump(historical_data, f, indent=2)
-    except FileNotFoundError:
-        with open(DATA_FILE, 'w') as f:
-            json.dump([data], f, indent=2)
-
-def get_historical_data():
-    """Retrieves historical data from the JSON file."""
-    try:
+def load_data():
+    if os.path.exists(DATA_FILE):
        with open(DATA_FILE, 'r') as f:
            return json.load(f)
-    except (FileNotFoundError, json.JSONDecodeError):
    return []

+def store_data(new_data):
+    data = load_data()
+    data.append(new_data)
+    with open(DATA_FILE, 'w') as f:
+        json.dump(data, f, indent=4)
+
 def calculate_baselines():
-    """Calculates baseline averages for network metrics."""
-    historical_data = get_historical_data()
-    if not historical_data:
-        return None
+    data = load_data()
+    if not data:
+        return {}

-    # Calculate average network metrics
-    total_packets_transmitted = 0
-    total_packets_received = 0
-    total_packet_loss_percent = 0
-    total_round_trip_ms_avg = 0
-    count = 0
+    # For simplicity, we'll average the last 24 hours of data
+    # More complex logic can be added here
+    recent_data = [d for d in data if datetime.fromisoformat(d['system_logs']['timestamp'].replace('Z', '')) > datetime.now() - timedelta(hours=24)]

-    for data in historical_data:
-        if "network_metrics" in data and data["network_metrics"]:
-            total_packets_transmitted += data["network_metrics"].get("packets_transmitted", 0) or 0
-            total_packets_received += data["network_metrics"].get("packets_received", 0) or 0
-            total_packet_loss_percent += data["network_metrics"].get("packet_loss_percent", 0) or 0
-            total_round_trip_ms_avg += data["network_metrics"].get("round_trip_ms_avg", 0) or 0
-            count += 1
+    if not recent_data:
+        return {}

-    if count == 0:
-        return None
-
-    return {
-        "avg_packets_transmitted": total_packets_transmitted / count,
-        "avg_packets_received": total_packets_received / count,
-        "avg_packet_loss_percent": total_packet_loss_percent / count,
-        "avg_round_trip_ms_avg": total_round_trip_ms_avg / count,
+    baseline_metrics = {
+        'avg_rtt': sum(d['network_metrics']['round_trip_ms_avg'] for d in recent_data) / len(recent_data),
+        'packet_loss': sum(d['network_metrics']['packet_loss_percent'] for d in recent_data) / len(recent_data),
    }
+
+    return baseline_metrics
--- a/monitor_agent.py
+++ b/monitor_agent.py
@@ -145,6 +145,8 @@ if __name__ == "__main__":
                    "network_metrics": network_metrics
                }

+                data_storage.store_data(combined_data)
+
                llm_response = analyze_data_with_llm(combined_data, data_storage.calculate_baselines())

                if llm_response and llm_response != "OK":