Completed NMAP & Refactoring

2025-08-20 15:16:21 -05:00
parent dd673829d2
commit 63ee043f34
7 changed files with 268 additions and 245 deletions
--- a/monitor_agent.py
+++ b/monitor_agent.py
@@ -19,6 +19,7 @@ import config
 from syslog_rfc5424_parser import parser

 LOG_POSITION_FILE = 'log_position.txt'
+AUTH_LOG_POSITION_FILE = 'auth_log_position.txt'

 # --- Data Ingestion & Parsing Functions ---

@@ -54,7 +55,6 @@ def get_system_logs():
        print(f"Error reading syslog: {e}")
        return {"syslog": []}

-import pingparsing

 def get_network_metrics():
    """Gets network metrics by pinging 8.8.8.8."""
@@ -69,45 +69,59 @@ def get_network_metrics():
        print(f"Error getting network metrics: {e}")
        return {"error": "ping command failed"}

-def get_cpu_temperature():
-    """Gets the CPU temperature using the sensors command."""
+def get_sensor_data():
+    """Gets all sensor data at once."""
    try:
-        sensors_output = subprocess.check_output(["sensors"], text=True)
-        # Use regex to find the CPU temperature
-        match = re.search(r"Package id 0:\s+\+([\d\.]+)", sensors_output)
-        if match:
-            return {"cpu_temperature": float(match.group(1))}
-        else:
-            return {"cpu_temperature": "N/A"}
+        return subprocess.check_output(["sensors"], text=True)
    except (subprocess.CalledProcessError, FileNotFoundError):
        print("Error: 'sensors' command not found. Please install lm-sensors.")
+        return None
+
+def get_cpu_temperature(sensors_output):
+    """Gets the CPU temperature from the sensors output."""
+    if not sensors_output:
+        return {"cpu_temperature": "N/A"}
+    # Use regex to find the CPU temperature
+    match = re.search(r"Package id 0:\s+\+([\d\.]+)", sensors_output)
+    if match:
+        return {"cpu_temperature": float(match.group(1))}
+    else:
        return {"cpu_temperature": "N/A"}

-def get_gpu_temperature():
-    """Gets the GPU temperature using the sensors command."""
-    try:
-        sensors_output = subprocess.check_output(["sensors"], text=True)
-        # Use regex to find the GPU temperature for amdgpu
-        match = re.search(r"edge:\s+\+([\d\.]+)", sensors_output)
+def get_gpu_temperature(sensors_output):
+    """Gets the GPU temperature from the sensors output."""
+    if not sensors_output:
+        return {"gpu_temperature": "N/A"}
+    # Use regex to find the GPU temperature for amdgpu
+    match = re.search(r"edge:\s+\+([\d\.]+)", sensors_output)
+    if match:
+        return {"gpu_temperature": float(match.group(1))}
+    else:
+        # if amdgpu not found, try radeon
+        match = re.search(r"temp1:\s+\+([\d\.]+)", sensors_output)
        if match:
            return {"gpu_temperature": float(match.group(1))}
        else:
-            # if amdgpu not found, try radeon
-            match = re.search(r"temp1:\s+\+([\d\.]+)", sensors_output)
-            if match:
-                return {"gpu_temperature": float(match.group(1))}
-            else:
-                return {"gpu_temperature": "N/A"}
-    except (subprocess.CalledProcessError, FileNotFoundError):
-        print("Error: 'sensors' command not found. Please install lm-sensors.")
-        return {"gpu_temperature": "N/A"}
+            return {"gpu_temperature": "N/A"}
+
+

 def get_login_attempts():
-    """Gets system login attempts from /var/log/auth.log."""
+    """Gets system login attempts from /var/log/auth.log since the last check."""
    try:
+        last_position = 0
+        if os.path.exists(AUTH_LOG_POSITION_FILE):
+            with open(AUTH_LOG_POSITION_FILE, 'r') as f:
+                last_position = int(f.read())
+
        with open("/var/log/auth.log", "r") as f:
+            f.seek(last_position)
            log_lines = f.readlines()
-        
+            current_position = f.tell()
+
+        with open(AUTH_LOG_POSITION_FILE, 'w') as f:
+            f.write(str(current_position))
+
        failed_logins = []
        for line in log_lines:
            if "Failed password" in line:
@@ -122,7 +136,7 @@ def get_login_attempts():
        return {"failed_logins": []}

 def get_nmap_scan_results():
-    """Performs an Nmap scan and returns the results."""
+    """Performs an Nmap scan and returns a structured summary."""
    try:
        nm = nmap.PortScanner()
        scan_options = config.NMAP_SCAN_OPTIONS
@@ -131,47 +145,37 @@ def get_nmap_scan_results():
            scan_options = scan_options.replace("-sS", "-sT")

        scan_results = nm.scan(hosts=config.NMAP_TARGETS, arguments=scan_options)
-        return scan_results
+
+        # Process the results into a more structured format
+        processed_results = {"hosts": []}
+        if "scan" in scan_results:
+            for host, scan_data in scan_results["scan"].items():
+                host_info = {
+                    "ip": host,
+                    "status": scan_data.get("status", {}).get("state", "unknown"),
+                    "open_ports": []
+                }
+                if "tcp" in scan_data:
+                    for port, port_data in scan_data["tcp"].items():
+                        if port_data.get("state") == "open":
+                            host_info["open_ports"].append({
+                                "port": port,
+                                "service": port_data.get("name", ""),
+                                "product": port_data.get("product", ""),
+                                "version": port_data.get("version", "")
+                            })
+                processed_results["hosts"].append(host_info)
+
+        return processed_results
    except Exception as e:
        print(f"Error performing Nmap scan: {e}")
        return {"error": "Nmap scan failed"}

 # --- LLM Interaction Function ---

-def analyze_data_with_llm(data, baselines):
-    """Analyzes data with the local LLM."""
-    with open("CONSTRAINTS.md", "r") as f:
-        constraints = f.read()
-
-    with open("known_issues.json", "r") as f:
-        known_issues = json.load(f)
-
-    # Compare current nmap results with baseline
-    nmap_changes = {"new_hosts": [], "changed_ports": {}}
-    if "nmap_results" in data and "host_ports" in baselines:
-        current_hosts = set(data["nmap_results"].get("scan", {}).keys())
-        baseline_hosts = set(baselines["host_ports"].keys())
-
-        # New hosts
-        nmap_changes["new_hosts"] = sorted(list(current_hosts - baseline_hosts))
-
-        # Changed ports on existing hosts
-        for host in current_hosts.intersection(baseline_hosts):
-            current_ports = set()
-            if "tcp" in data["nmap_results"]["scan"][host]:
-                for port, port_data in data["nmap_results"]["scan"][host]["tcp"].items():
-                    if port_data["state"] == "open":
-                        current_ports.add(port)
-            
-            baseline_ports = set(baselines["host_ports"].get(host, []))
-
-            newly_opened = sorted(list(current_ports - baseline_ports))
-            newly_closed = sorted(list(baseline_ports - current_ports))
-
-            if newly_opened or newly_closed:
-                nmap_changes["changed_ports"][host] = {"opened": newly_opened, "closed": newly_closed}
-
-    prompt = f"""
+def build_llm_prompt(data, baselines, nmap_changes, constraints, known_issues):
+    """Builds the prompt for the LLM analysis."""
+    return f"""
    **Role:** You are a dedicated and expert system administrator. Your primary role is to identify anomalies and provide concise, actionable reports.

    **Instruction:** Analyze the following system and network data for any activity that appears out of place or different. Consider unusual values, errors, or unexpected patterns as anomalies. Compare the current data with the historical baseline data to identify significant deviations. Consult the known issues feed to avoid flagging resolved or expected issues. Pay special attention to the Nmap scan results for any new or unexpected open ports.
@@ -191,35 +195,64 @@ def analyze_data_with_llm(data, baselines):
    **Constraints and Guidelines:**
    {constraints}

-    **Output Request:** If you find an anomaly, provide a report as a single JSON object with two keys: "severity" and "reason". The "severity" must be one of "high", "medium", "low", or "none". The "reason" must be a natural language explanation of the anomaly. If no anomaly is found, return a single JSON object with "severity" set to "none" and "reason" as an empty string. Do not wrap the JSON in markdown or any other formatting.
+    **Output Request:** If you find an anomaly, provide a report as a single JSON object with two keys: "severity" and "reason". The "severity" must be one of "high", "medium", "low", or "none". The "reason" must be a natural language explanation of the anomaly. Please include specific values if the anomoly has them. If no anomaly is found, return a single JSON object with "severity" set to "none" and "reason" as an empty string. Do not wrap the JSON in markdown or any other formatting.

    **Reasoning Hint:** Think step by step to come to your conclusion. This is very important.
    """
+
+def analyze_data_with_llm(data, baselines):
+    """Analyzes data with the local LLM."""
+    with open("CONSTRAINTS.md", "r") as f:
+        constraints = f.read()
+
+    with open("known_issues.json", "r") as f:
+        known_issues = json.load(f)
+
+    # Compare current nmap results with baseline
+    nmap_changes = {"new_hosts": [], "changed_ports": {}}
+    if "nmap_results" in data and "host_ports" in baselines:
+        current_hosts_info = {host['ip']: host for host in data["nmap_results"].get("hosts", [])}
+        current_hosts = set(current_hosts_info.keys())
+        baseline_hosts = set(baselines["host_ports"].keys())
+
+        # New hosts
+        nmap_changes["new_hosts"] = sorted(list(current_hosts - baseline_hosts))
+
+        # Changed ports on existing hosts
+        for host_ip in current_hosts.intersection(baseline_hosts):
+            current_ports = set(p['port'] for p in current_hosts_info[host_ip].get("open_ports", []))
+
+            baseline_ports = set(baselines["host_ports"].get(host_ip, []))
+
+            newly_opened = sorted(list(current_ports - baseline_ports))
+            newly_closed = sorted(list(baseline_ports - current_ports))
+
+            if newly_opened or newly_closed:
+                nmap_changes["changed_ports"][host_ip] = {"opened": newly_opened, "closed": newly_closed}
+
+    prompt = build_llm_prompt(data, baselines, nmap_changes, constraints, known_issues)
+
    try:
        response = ollama.generate(model="llama3.1:8b", prompt=prompt)
-        # Sanitize the response to ensure it's valid JSON
        sanitized_response = response['response'].strip()
-        # Find the first '{' and the last '}' to extract the JSON object
-        start_index = sanitized_response.find('{')
-        end_index = sanitized_response.rfind('}')
-        if start_index != -1 and end_index != -1:
-            json_string = sanitized_response[start_index:end_index+1]
-            try:
+        
+        # Extract JSON from the response
+        try:
+            # Find the first '{' and the last '}' to extract the JSON object
+            start_index = sanitized_response.find('{')
+            end_index = sanitized_response.rfind('}')
+            if start_index != -1 and end_index != -1:
+                json_string = sanitized_response[start_index:end_index+1]
                return json.loads(json_string)
-            except json.JSONDecodeError:
-                # If parsing a single object fails, try parsing as a list
-                try:
-                    json_list = json.loads(json_string)
-                    if isinstance(json_list, list) and json_list:
-                        return json_list[0] # Return the first object in the list
-                except json.JSONDecodeError as e:
-                    print(f"Error decoding LLM response: {e}")
-                    # Fallback for invalid JSON
-                    return {{"severity": "low", "reason": response['response'].strip()}} # type: ignore
-        else:
-            # Handle cases where the response is not valid JSON
-            print(f"LLM returned a non-JSON response: {sanitized_response}")
-            return {{"severity": "low", "reason": sanitized_response}} # type: ignore
+            else:
+                # Handle cases where the response is not valid JSON
+                print(f"LLM returned a non-JSON response: {sanitized_response}")
+                return {"severity": "low", "reason": sanitized_response}
+        except json.JSONDecodeError as e:
+            print(f"Error decoding LLM response: {e}")
+            # Fallback for invalid JSON
+            return {"severity": "low", "reason": sanitized_response}
+
    except Exception as e:
        print(f"Error interacting with LLM: {e}")
        return None
@@ -272,84 +305,68 @@ def send_google_home_alert(message):

 daily_events = []

-if __name__ == "__main__":
+def run_monitoring_cycle(nmap_scan_counter):
+    """Runs a single monitoring cycle."""
+    print("Running monitoring cycle...")
+    system_logs = get_system_logs()
+    network_metrics = get_network_metrics()
+    sensors_output = get_sensor_data()
+    cpu_temp = get_cpu_temperature(sensors_output)
+    gpu_temp = get_gpu_temperature(sensors_output)
+    login_attempts = get_login_attempts()
+
+    nmap_results = None
+    if nmap_scan_counter == 0:
+        nmap_results = get_nmap_scan_results()
+    
+    nmap_scan_counter = (nmap_scan_counter + 1) % 4 # Run nmap scan every 4th cycle (20 minutes)
+
+    if system_logs and network_metrics:
+        combined_data = {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "system_logs": system_logs,
+            "network_metrics": network_metrics,
+            "cpu_temperature": cpu_temp,
+            "gpu_temperature": gpu_temp,
+            "login_attempts": login_attempts
+        }
+
+        if nmap_results:
+            combined_data["nmap_results"] = nmap_results
+
+        data_storage.store_data(combined_data)
+
+        llm_response = analyze_data_with_llm(combined_data, data_storage.calculate_baselines())
+
+        if llm_response and llm_response.get('severity') != "none":
+            daily_events.append(llm_response.get('reason'))
+            if llm_response.get('severity') == "high":
+                send_discord_alert(llm_response.get('reason'))
+                send_google_home_alert(llm_response.get('reason'))
+    return nmap_scan_counter
+
+def main():
+    """Main function to run the monitoring agent."""
    if config.TEST_MODE:
        print("Running in test mode...")
-        system_logs = get_system_logs()
-        network_metrics = get_network_metrics()
-        cpu_temp = get_cpu_temperature()
-        gpu_temp = get_gpu_temperature()
-        login_attempts = get_login_attempts()
-        nmap_results = get_nmap_scan_results()
-
-        if system_logs and network_metrics:
-            combined_data = {
-                "timestamp": datetime.now(timezone.utc).isoformat(),
-                "system_logs": system_logs,
-                "network_metrics": network_metrics,
-                "cpu_temperature": cpu_temp,
-                "gpu_temperature": gpu_temp,
-                "login_attempts": login_attempts,
-                "nmap_results": nmap_results
-            }
-            data_storage.store_data(combined_data)
-
-            llm_response = analyze_data_with_llm(combined_data, data_storage.calculate_baselines())
-
-            if llm_response and llm_response.get('severity') != "none":
-                print(f"Anomaly detected: {llm_response.get('reason')}")
-                if llm_response.get('severity') == "high":
-                    send_discord_alert(llm_response.get('reason'))
-                    send_google_home_alert(llm_response.get('reason'))
-            else:
-                print("No anomaly detected.")
+        run_monitoring_cycle(0)
    else:
        nmap_scan_counter = 0
        while True:
-            print("Running monitoring cycle...")
-            system_logs = get_system_logs()
-            network_metrics = get_network_metrics()
-            cpu_temp = get_cpu_temperature()
-            gpu_temp = get_gpu_temperature()
-            login_attempts = get_login_attempts()
-
-            nmap_results = None
-            if nmap_scan_counter == 0:
-                nmap_results = get_nmap_scan_results()
-            
-            nmap_scan_counter = (nmap_scan_counter + 1) % 4 # Run nmap scan every 4th cycle (20 minutes)
-
-            if system_logs and network_metrics:
-                combined_data = {
-                    "timestamp": datetime.now(timezone.utc).isoformat(),
-                    "system_logs": system_logs,
-                    "network_metrics": network_metrics,
-                    "cpu_temperature": cpu_temp,
-                    "gpu_temperature": gpu_temp,
-                    "login_attempts": login_attempts
-                }
-
-                if nmap_results:
-                    combined_data["nmap_results"] = nmap_results
-
-                data_storage.store_data(combined_data)
-
-                llm_response = analyze_data_with_llm(combined_data, data_storage.calculate_baselines())
-
-                if llm_response and llm_response.get('severity') != "none":
-                    daily_events.append(llm_response.get('reason'))
-                    if llm_response.get('severity') == "high":
-                        send_discord_alert(llm_response.get('reason'))
-                        send_google_home_alert(llm_response.get('reason'))
+            nmap_scan_counter = run_monitoring_cycle(nmap_scan_counter)

            # Daily Recap Logic
            current_time = time.strftime("%H:%M")
-            if current_time == config.DAILY_RECAP_TIME and daily_events:
+            if current_time == config.DAILY_RECAP_TIME and daily_events: # type: ignore
                recap_message = "\n".join(daily_events)
                send_discord_alert(f"**Daily Recap:**\n{recap_message}")
                daily_events = [] # Reset for the next day

            time.sleep(300) # Run every 5 minutes

+if __name__ == "__main__":
+    main()
+
+