feat: Implemented data storage and baselining

This commit is contained in:
2025-08-18 12:49:51 -05:00
parent 3d74bf13f5
commit 9ac382e23e
3 changed files with 30 additions and 50 deletions

View File

@@ -55,9 +55,10 @@ The project will be composed of the following files:
- The loop will execute the data collection, analysis, and alerting steps periodically.
- The frequency of the monitoring loop will be configurable.
## 4. Future Features
## 4. Data Storage and Baselining
- **4.1. Data Storage and Averaging**: Store historical system data to calculate baseline averages for more accurate anomaly detection.
- **4.1. Data Storage**: The agent will store historical monitoring data in a JSON file (`monitoring_data.json`).
- **4.2. Baselining**: The agent will calculate baseline averages for key metrics (e.g., RTT, packet loss) from the stored historical data. This baseline will be used by the LLM to improve anomaly detection accuracy.
## 5. Technical Requirements

View File

@@ -1,59 +1,36 @@
# Data Storage for the LLM-Powered Monitoring Agent
import json
import os
from datetime import datetime, timedelta
DATA_FILE = "historical_data.json"
DATA_FILE = 'monitoring_data.json'
def store_data(data):
"""Stores data in a JSON file."""
try:
with open(DATA_FILE, 'r+') as f:
try:
historical_data = json.load(f)
except json.JSONDecodeError:
historical_data = []
historical_data.append(data)
f.seek(0)
json.dump(historical_data, f, indent=2)
except FileNotFoundError:
with open(DATA_FILE, 'w') as f:
json.dump([data], f, indent=2)
def get_historical_data():
"""Retrieves historical data from the JSON file."""
try:
def load_data():
if os.path.exists(DATA_FILE):
with open(DATA_FILE, 'r') as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
return []
def store_data(new_data):
data = load_data()
data.append(new_data)
with open(DATA_FILE, 'w') as f:
json.dump(data, f, indent=4)
def calculate_baselines():
"""Calculates baseline averages for network metrics."""
historical_data = get_historical_data()
if not historical_data:
return None
data = load_data()
if not data:
return {}
# Calculate average network metrics
total_packets_transmitted = 0
total_packets_received = 0
total_packet_loss_percent = 0
total_round_trip_ms_avg = 0
count = 0
# For simplicity, we'll average the last 24 hours of data
# More complex logic can be added here
recent_data = [d for d in data if datetime.fromisoformat(d['system_logs']['timestamp'].replace('Z', '')) > datetime.now() - timedelta(hours=24)]
for data in historical_data:
if "network_metrics" in data and data["network_metrics"]:
total_packets_transmitted += data["network_metrics"].get("packets_transmitted", 0) or 0
total_packets_received += data["network_metrics"].get("packets_received", 0) or 0
total_packet_loss_percent += data["network_metrics"].get("packet_loss_percent", 0) or 0
total_round_trip_ms_avg += data["network_metrics"].get("round_trip_ms_avg", 0) or 0
count += 1
if not recent_data:
return {}
if count == 0:
return None
return {
"avg_packets_transmitted": total_packets_transmitted / count,
"avg_packets_received": total_packets_received / count,
"avg_packet_loss_percent": total_packet_loss_percent / count,
"avg_round_trip_ms_avg": total_round_trip_ms_avg / count,
baseline_metrics = {
'avg_rtt': sum(d['network_metrics']['round_trip_ms_avg'] for d in recent_data) / len(recent_data),
'packet_loss': sum(d['network_metrics']['packet_loss_percent'] for d in recent_data) / len(recent_data),
}
return baseline_metrics

View File

@@ -145,6 +145,8 @@ if __name__ == "__main__":
"network_metrics": network_metrics
}
data_storage.store_data(combined_data)
llm_response = analyze_data_with_llm(combined_data, data_storage.calculate_baselines())
if llm_response and llm_response != "OK":