Refactor: Integrate scripts into a single application (v1.2.0)

2025-12-29 16:45:40 -06:00
parent 671741772f
commit 5bd154fb4e
6 changed files with 213 additions and 251 deletions
--- a/run_all.py
+++ b/run_all.py
@@ -1,89 +1,94 @@

-import subprocess
 import os
 import sys
 import argparse
+import pandas as pd
+from datetime import datetime

-def run_script(command):
-    """Runs a script and checks for errors, printing output in real-time."""
-    print(f"Executing: {' '.join(command)}", flush=True)
-    try:
-        process = subprocess.run(
-            command,
-            capture_output=True,
-            text=True,
-            check=True
-        )
-        print("✓ Success!")
-        if process.stdout:
-            print(process.stdout)
-        return process
-    except FileNotFoundError as e:
-        print(f"\nError: Command not found. Ensure Python is in your PATH. Details: {e}", file=sys.stderr)
-        sys.exit(1)
-    except subprocess.CalledProcessError as e:
-        print(f"\nError running command: {' '.join(command)}", file=sys.stderr)
-        print("\n--- STDERR ---", file=sys.stderr)
-        print(e.stderr, file=sys.stderr)
-        sys.exit(1)
+# Import the refactored functions from other scripts
+from process_territories import process_data
+from analysis import generate_analysis_artifacts
+from category_analysis import generate_category_map

-def process_files(addresses_file, boundaries_file):
+def process_and_save(addresses_file, boundaries_file):
    """
-    Runs the processing script to generate the 'Final' CSV.
-    Returns the path to the generated file or exits on error.
+    Runs the processing script and saves the result to a 'Final' CSV.
+    Returns the path to the generated file.
    """
    print("\n--- Step 1: Processing territory files ---")
-    
-    # Check if input files exist
-    if not os.path.exists(addresses_file):
-        print(f"Error: Address file not found at '{addresses_file}'", file=sys.stderr)
-        sys.exit(1)
-    if not os.path.exists(boundaries_file):
-        print(f"Error: Boundaries file not found at '{boundaries_file}'", file=sys.stderr)
-        sys.exit(1)
-
-    process_command = ["python", "process_territories.py", addresses_file, boundaries_file]
-    run_script(process_command)
-
-    # Find the most recently modified "Final.csv"
    try:
-        final_files = [f for f in os.listdir('.') if "Final.csv" in f and os.path.isfile(f)]
-        if not final_files:
-            print("Error: No 'Final.csv' file found after processing.", file=sys.stderr)
-            sys.exit(1)
-        latest_file = max(final_files, key=os.path.getmtime)
-        print(f"Generated file: {latest_file}")
-        return latest_file
-    except Exception as e:
-        print(f"Error locating processed file: {e}", file=sys.stderr)
+        # Process data in memory
+        processed_df = process_data(addresses_file, boundaries_file)
+
+        # Save the processed DataFrame to a CSV file
+        date_str = datetime.now().strftime('%b %Y')
+        output_filename = f'Okinawa Territory {date_str} - Final.csv'
+        
+        processed_df.to_csv(output_filename, index=False)
+        print(f"✓ Success! Generated file: {output_filename}")
+        return output_filename
+
+    except (FileNotFoundError, Exception) as e:
+        print(f"\nError during file processing: {e}", file=sys.stderr)
        sys.exit(1)

-def run_analysis(processed_file_path):
+def analyze_from_file(processed_file_path):
    """
-    Runs the analysis scripts on the processed file.
+    Reads a processed file and runs the analysis scripts on it.
    """
-    if not processed_file_path or not os.path.exists(processed_file_path):
-        print(f"\nError: Processed file not found at '{processed_file_path}'. Please run the 'process' step first.", file=sys.stderr)
+    print("\n--- Step 2: Running analysis from file ---")
+    try:
+        # Read the processed file into a DataFrame
+        df = pd.read_csv(processed_file_path)
+
+        # Run the analysis functions
+        generate_analysis_artifacts(df)
+        generate_category_map(df)
+        
+        print("\n✓ Analysis complete!")
+        print("Generated files: analysis.md, map.html, category_map.html")
+
+    except FileNotFoundError as e:
+        print(f"\nError: Processed file not found at '{processed_file_path}'.", file=sys.stderr)
        sys.exit(1)
+    except Exception as e:
+        print(f"\nAn unexpected error occurred during analysis: {e}", file=sys.stderr)
+        sys.exit(1)
+
+def full_run_in_memory(addresses_file, boundaries_file):
+    """
+    Processes data and runs analysis entirely in memory.
+    """
+    print("\n--- Running full pipeline in memory ---")
+    try:
+        # Step 1: Process data
+        print("Processing data...")
+        processed_df = process_data(addresses_file, boundaries_file)
+        print("✓ Data processing complete.")
+
+        # Step 2: Run analysis
+        print("\nRunning analysis...")
+        generate_analysis_artifacts(processed_df)
+        generate_category_map(processed_df)
        
-    print("\n--- Step 2: Running analysis scripts ---")
-    
-    analysis_command = ["python", "analysis.py", processed_file_path]
-    run_script(analysis_command)
-    
-    category_analysis_command = ["python", "category_analysis.py", processed_file_path]
-    run_script(category_analysis_command)
-        
-    print("\nAnalysis complete!")
-    print("Generated files: analysis.md, map.html, category_map.html")
+        print("\n✓ Analysis complete!")
+        print("Generated files: analysis.md, map.html, category_map.html")
+
+    except (FileNotFoundError, Exception) as e:
+        print(f"\nAn error occurred during the full run: {e}", file=sys.stderr)
+        sys.exit(1)
+

 def main():
    """Parses command-line arguments and orchestrates the workflow."""
-    parser = argparse.ArgumentParser(description="Territory Analysis Tool")
+    parser = argparse.ArgumentParser(
+        description="Territory Analysis Tool v1.2.0",
+        formatter_class=argparse.RawTextHelpFormatter
+    )
    subparsers = parser.add_subparsers(dest="command", required=True, help="Available commands")

    # Sub-command for 'process'
-    parser_process = subparsers.add_parser("process", help="Step 1: Process raw address and boundary files into a final CSV.")
+    parser_process = subparsers.add_parser("process", help="Step 1: Process raw files and save the result to a CSV.")
    parser_process.add_argument("--addresses", required=True, help="Path to the addresses CSV file.")
    parser_process.add_argument("--boundaries", required=True, help="Path to the boundaries CSV file.")

@@ -92,26 +97,22 @@ def main():
    parser_analyze.add_argument("--input", required=True, help="Path to the processed 'Final' CSV file.")

    # Sub-command for 'full-run'
-    parser_full_run = subparsers.add_parser("full-run", help="Run the full pipeline: process and then analyze.")
+    parser_full_run = subparsers.add_parser("full-run", help="Run the full pipeline (process and analyze) in memory.")
    parser_full_run.add_argument("--addresses", required=True, help="Path to the addresses CSV file.")
    parser_full_run.add_argument("--boundaries", required=True, help="Path to the boundaries CSV file.")

    args = parser.parse_args()

    if args.command == "process":
-        process_files(args.addresses, args.boundaries)
+        process_and_save(args.addresses, args.boundaries)
    
    elif args.command == "analyze":
-        run_analysis(args.input)
+        analyze_from_file(args.input)
        
    elif args.command == "full-run":
-        # Run step 1
-        processed_file = process_files(args.addresses, args.boundaries)
-        # Run step 2
-        run_analysis(processed_file)
+        full_run_in_memory(args.addresses, args.boundaries)

 if __name__ == "__main__":
-    # Change working directory to the script's directory
-    # This makes file paths relative to the script's location
+    # Ensure the script runs in its own directory context
    os.chdir(os.path.dirname(os.path.abspath(__file__)))
    main()