feat: Implemented data storage and baselining

This commit is contained in:
2025-08-18 12:49:51 -05:00
parent 3d74bf13f5
commit 9ac382e23e
3 changed files with 30 additions and 50 deletions

View File

@@ -55,9 +55,10 @@ The project will be composed of the following files:
- The loop will execute the data collection, analysis, and alerting steps periodically. - The loop will execute the data collection, analysis, and alerting steps periodically.
- The frequency of the monitoring loop will be configurable. - The frequency of the monitoring loop will be configurable.
## 4. Future Features ## 4. Data Storage and Baselining
- **4.1. Data Storage and Averaging**: Store historical system data to calculate baseline averages for more accurate anomaly detection. - **4.1. Data Storage**: The agent will store historical monitoring data in a JSON file (`monitoring_data.json`).
- **4.2. Baselining**: The agent will calculate baseline averages for key metrics (e.g., RTT, packet loss) from the stored historical data. This baseline will be used by the LLM to improve anomaly detection accuracy.
## 5. Technical Requirements ## 5. Technical Requirements

View File

@@ -1,59 +1,36 @@
# Data Storage for the LLM-Powered Monitoring Agent
import json import json
import os
from datetime import datetime, timedelta
DATA_FILE = "historical_data.json" DATA_FILE = 'monitoring_data.json'
def store_data(data): def load_data():
"""Stores data in a JSON file.""" if os.path.exists(DATA_FILE):
try:
with open(DATA_FILE, 'r+') as f:
try:
historical_data = json.load(f)
except json.JSONDecodeError:
historical_data = []
historical_data.append(data)
f.seek(0)
json.dump(historical_data, f, indent=2)
except FileNotFoundError:
with open(DATA_FILE, 'w') as f:
json.dump([data], f, indent=2)
def get_historical_data():
"""Retrieves historical data from the JSON file."""
try:
with open(DATA_FILE, 'r') as f: with open(DATA_FILE, 'r') as f:
return json.load(f) return json.load(f)
except (FileNotFoundError, json.JSONDecodeError): return []
return []
def store_data(new_data):
data = load_data()
data.append(new_data)
with open(DATA_FILE, 'w') as f:
json.dump(data, f, indent=4)
def calculate_baselines(): def calculate_baselines():
"""Calculates baseline averages for network metrics.""" data = load_data()
historical_data = get_historical_data() if not data:
if not historical_data: return {}
return None
# Calculate average network metrics # For simplicity, we'll average the last 24 hours of data
total_packets_transmitted = 0 # More complex logic can be added here
total_packets_received = 0 recent_data = [d for d in data if datetime.fromisoformat(d['system_logs']['timestamp'].replace('Z', '')) > datetime.now() - timedelta(hours=24)]
total_packet_loss_percent = 0
total_round_trip_ms_avg = 0
count = 0
for data in historical_data: if not recent_data:
if "network_metrics" in data and data["network_metrics"]: return {}
total_packets_transmitted += data["network_metrics"].get("packets_transmitted", 0) or 0
total_packets_received += data["network_metrics"].get("packets_received", 0) or 0
total_packet_loss_percent += data["network_metrics"].get("packet_loss_percent", 0) or 0
total_round_trip_ms_avg += data["network_metrics"].get("round_trip_ms_avg", 0) or 0
count += 1
if count == 0: baseline_metrics = {
return None 'avg_rtt': sum(d['network_metrics']['round_trip_ms_avg'] for d in recent_data) / len(recent_data),
'packet_loss': sum(d['network_metrics']['packet_loss_percent'] for d in recent_data) / len(recent_data),
return {
"avg_packets_transmitted": total_packets_transmitted / count,
"avg_packets_received": total_packets_received / count,
"avg_packet_loss_percent": total_packet_loss_percent / count,
"avg_round_trip_ms_avg": total_round_trip_ms_avg / count,
} }
return baseline_metrics

View File

@@ -145,6 +145,8 @@ if __name__ == "__main__":
"network_metrics": network_metrics "network_metrics": network_metrics
} }
data_storage.store_data(combined_data)
llm_response = analyze_data_with_llm(combined_data, data_storage.calculate_baselines()) llm_response = analyze_data_with_llm(combined_data, data_storage.calculate_baselines())
if llm_response and llm_response != "OK": if llm_response and llm_response != "OK":