feat: Implemented data storage and baselining

2025-08-18 12:49:51 -05:00
parent 3d74bf13f5
commit 9ac382e23e
3 changed files with 30 additions and 50 deletions
--- a/SPEC.md
+++ b/SPEC.md
@@ -55,9 +55,10 @@ The project will be composed of the following files:
 - The loop will execute the data collection, analysis, and alerting steps periodically.
 - The frequency of the monitoring loop will be configurable.
-## 4. Future Features
+## 4. Data Storage and Baselining
- **4.1. Data Storage and Averaging**: Store historical system data to calculate baseline averages for more accurate anomaly detection.
+- **4.1. Data Storage**: The agent will store historical monitoring data in a JSON file (`monitoring_data.json`).
 - **4.2. Baselining**: The agent will calculate baseline averages for key metrics (e.g., RTT, packet loss) from the stored historical data. This baseline will be used by the LLM to improve anomaly detection accuracy.
 ## 5. Technical Requirements
--- a/data_storage.py
+++ b/data_storage.py
@@ -1,59 +1,36 @@
 # Data Storage for the LLM-Powered Monitoring Agent
 import json
 import os
 from datetime import datetime, timedelta
-DATA_FILE = "historical_data.json"
+DATA_FILE = 'monitoring_data.json'
-def store_data(data):
+def load_data():
-    """Stores data in a JSON file."""
+    if os.path.exists(DATA_FILE):
    try:
        with open(DATA_FILE, 'r+') as f:
            try:
                historical_data = json.load(f)
            except json.JSONDecodeError:
                historical_data = []
            historical_data.append(data)
            f.seek(0)
            json.dump(historical_data, f, indent=2)
    except FileNotFoundError:
        with open(DATA_FILE, 'w') as f:
            json.dump([data], f, indent=2)
 def get_historical_data():
    """Retrieves historical data from the JSON file."""
    try:
        with open(DATA_FILE, 'r') as f:
            return json.load(f)
-    except (FileNotFoundError, json.JSONDecodeError):
+    return []
-        return []
+
 def store_data(new_data):
    data = load_data()
    data.append(new_data)
    with open(DATA_FILE, 'w') as f:
        json.dump(data, f, indent=4)
 def calculate_baselines():
-    """Calculates baseline averages for network metrics."""
+    data = load_data()
-    historical_data = get_historical_data()
+    if not data:
-    if not historical_data:
+        return {}
        return None
-    # Calculate average network metrics
+    # For simplicity, we'll average the last 24 hours of data
-    total_packets_transmitted = 0
+    # More complex logic can be added here
-    total_packets_received = 0
+    recent_data = [d for d in data if datetime.fromisoformat(d['system_logs']['timestamp'].replace('Z', '')) > datetime.now() - timedelta(hours=24)]
    total_packet_loss_percent = 0
    total_round_trip_ms_avg = 0
    count = 0
-    for data in historical_data:
+    if not recent_data:
-        if "network_metrics" in data and data["network_metrics"]:
+        return {}
            total_packets_transmitted += data["network_metrics"].get("packets_transmitted", 0) or 0
            total_packets_received += data["network_metrics"].get("packets_received", 0) or 0
            total_packet_loss_percent += data["network_metrics"].get("packet_loss_percent", 0) or 0
            total_round_trip_ms_avg += data["network_metrics"].get("round_trip_ms_avg", 0) or 0
            count += 1
-    if count == 0:
+    baseline_metrics = {
-        return None
+        'avg_rtt': sum(d['network_metrics']['round_trip_ms_avg'] for d in recent_data) / len(recent_data),
-
+        'packet_loss': sum(d['network_metrics']['packet_loss_percent'] for d in recent_data) / len(recent_data),
    return {
        "avg_packets_transmitted": total_packets_transmitted / count,
        "avg_packets_received": total_packets_received / count,
        "avg_packet_loss_percent": total_packet_loss_percent / count,
        "avg_round_trip_ms_avg": total_round_trip_ms_avg / count,
    }
    return baseline_metrics
--- a/monitor_agent.py
+++ b/monitor_agent.py
@@ -145,6 +145,8 @@ if __name__ == "__main__":
                    "network_metrics": network_metrics
                }
                data_storage.store_data(combined_data)
                llm_response = analyze_data_with_llm(combined_data, data_storage.calculate_baselines())
                if llm_response and llm_response != "OK":