Refactor: Integrate scripts into a single application (v1.2.0)

This commit is contained in:
2025-12-29 16:45:40 -06:00
parent 671741772f
commit 5bd154fb4e
6 changed files with 213 additions and 251 deletions

View File

@@ -1,89 +1,94 @@
import subprocess
import os
import sys
import argparse
import pandas as pd
from datetime import datetime
def run_script(command):
"""Runs a script and checks for errors, printing output in real-time."""
print(f"Executing: {' '.join(command)}", flush=True)
try:
process = subprocess.run(
command,
capture_output=True,
text=True,
check=True
)
print("✓ Success!")
if process.stdout:
print(process.stdout)
return process
except FileNotFoundError as e:
print(f"\nError: Command not found. Ensure Python is in your PATH. Details: {e}", file=sys.stderr)
sys.exit(1)
except subprocess.CalledProcessError as e:
print(f"\nError running command: {' '.join(command)}", file=sys.stderr)
print("\n--- STDERR ---", file=sys.stderr)
print(e.stderr, file=sys.stderr)
sys.exit(1)
# Import the refactored functions from other scripts
from process_territories import process_data
from analysis import generate_analysis_artifacts
from category_analysis import generate_category_map
def process_files(addresses_file, boundaries_file):
def process_and_save(addresses_file, boundaries_file):
"""
Runs the processing script to generate the 'Final' CSV.
Returns the path to the generated file or exits on error.
Runs the processing script and saves the result to a 'Final' CSV.
Returns the path to the generated file.
"""
print("\n--- Step 1: Processing territory files ---")
# Check if input files exist
if not os.path.exists(addresses_file):
print(f"Error: Address file not found at '{addresses_file}'", file=sys.stderr)
sys.exit(1)
if not os.path.exists(boundaries_file):
print(f"Error: Boundaries file not found at '{boundaries_file}'", file=sys.stderr)
sys.exit(1)
process_command = ["python", "process_territories.py", addresses_file, boundaries_file]
run_script(process_command)
# Find the most recently modified "Final.csv"
try:
final_files = [f for f in os.listdir('.') if "Final.csv" in f and os.path.isfile(f)]
if not final_files:
print("Error: No 'Final.csv' file found after processing.", file=sys.stderr)
sys.exit(1)
latest_file = max(final_files, key=os.path.getmtime)
print(f"Generated file: {latest_file}")
return latest_file
except Exception as e:
print(f"Error locating processed file: {e}", file=sys.stderr)
# Process data in memory
processed_df = process_data(addresses_file, boundaries_file)
# Save the processed DataFrame to a CSV file
date_str = datetime.now().strftime('%b %Y')
output_filename = f'Okinawa Territory {date_str} - Final.csv'
processed_df.to_csv(output_filename, index=False)
print(f"✓ Success! Generated file: {output_filename}")
return output_filename
except (FileNotFoundError, Exception) as e:
print(f"\nError during file processing: {e}", file=sys.stderr)
sys.exit(1)
def run_analysis(processed_file_path):
def analyze_from_file(processed_file_path):
"""
Runs the analysis scripts on the processed file.
Reads a processed file and runs the analysis scripts on it.
"""
if not processed_file_path or not os.path.exists(processed_file_path):
print(f"\nError: Processed file not found at '{processed_file_path}'. Please run the 'process' step first.", file=sys.stderr)
print("\n--- Step 2: Running analysis from file ---")
try:
# Read the processed file into a DataFrame
df = pd.read_csv(processed_file_path)
# Run the analysis functions
generate_analysis_artifacts(df)
generate_category_map(df)
print("\n✓ Analysis complete!")
print("Generated files: analysis.md, map.html, category_map.html")
except FileNotFoundError as e:
print(f"\nError: Processed file not found at '{processed_file_path}'.", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"\nAn unexpected error occurred during analysis: {e}", file=sys.stderr)
sys.exit(1)
def full_run_in_memory(addresses_file, boundaries_file):
"""
Processes data and runs analysis entirely in memory.
"""
print("\n--- Running full pipeline in memory ---")
try:
# Step 1: Process data
print("Processing data...")
processed_df = process_data(addresses_file, boundaries_file)
print("✓ Data processing complete.")
# Step 2: Run analysis
print("\nRunning analysis...")
generate_analysis_artifacts(processed_df)
generate_category_map(processed_df)
print("\n--- Step 2: Running analysis scripts ---")
analysis_command = ["python", "analysis.py", processed_file_path]
run_script(analysis_command)
category_analysis_command = ["python", "category_analysis.py", processed_file_path]
run_script(category_analysis_command)
print("\nAnalysis complete!")
print("Generated files: analysis.md, map.html, category_map.html")
print("\n✓ Analysis complete!")
print("Generated files: analysis.md, map.html, category_map.html")
except (FileNotFoundError, Exception) as e:
print(f"\nAn error occurred during the full run: {e}", file=sys.stderr)
sys.exit(1)
def main():
"""Parses command-line arguments and orchestrates the workflow."""
parser = argparse.ArgumentParser(description="Territory Analysis Tool")
parser = argparse.ArgumentParser(
description="Territory Analysis Tool v1.2.0",
formatter_class=argparse.RawTextHelpFormatter
)
subparsers = parser.add_subparsers(dest="command", required=True, help="Available commands")
# Sub-command for 'process'
parser_process = subparsers.add_parser("process", help="Step 1: Process raw address and boundary files into a final CSV.")
parser_process = subparsers.add_parser("process", help="Step 1: Process raw files and save the result to a CSV.")
parser_process.add_argument("--addresses", required=True, help="Path to the addresses CSV file.")
parser_process.add_argument("--boundaries", required=True, help="Path to the boundaries CSV file.")
@@ -92,26 +97,22 @@ def main():
parser_analyze.add_argument("--input", required=True, help="Path to the processed 'Final' CSV file.")
# Sub-command for 'full-run'
parser_full_run = subparsers.add_parser("full-run", help="Run the full pipeline: process and then analyze.")
parser_full_run = subparsers.add_parser("full-run", help="Run the full pipeline (process and analyze) in memory.")
parser_full_run.add_argument("--addresses", required=True, help="Path to the addresses CSV file.")
parser_full_run.add_argument("--boundaries", required=True, help="Path to the boundaries CSV file.")
args = parser.parse_args()
if args.command == "process":
process_files(args.addresses, args.boundaries)
process_and_save(args.addresses, args.boundaries)
elif args.command == "analyze":
run_analysis(args.input)
analyze_from_file(args.input)
elif args.command == "full-run":
# Run step 1
processed_file = process_files(args.addresses, args.boundaries)
# Run step 2
run_analysis(processed_file)
full_run_in_memory(args.addresses, args.boundaries)
if __name__ == "__main__":
# Change working directory to the script's directory
# This makes file paths relative to the script's location
# Ensure the script runs in its own directory context
os.chdir(os.path.dirname(os.path.abspath(__file__)))
main()