Refactor: Integrate scripts into a single application (v1.2.0)
This commit is contained in:
147
run_all.py
147
run_all.py
@@ -1,89 +1,94 @@
|
||||
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
def run_script(command):
|
||||
"""Runs a script and checks for errors, printing output in real-time."""
|
||||
print(f"Executing: {' '.join(command)}", flush=True)
|
||||
try:
|
||||
process = subprocess.run(
|
||||
command,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
print("✓ Success!")
|
||||
if process.stdout:
|
||||
print(process.stdout)
|
||||
return process
|
||||
except FileNotFoundError as e:
|
||||
print(f"\nError: Command not found. Ensure Python is in your PATH. Details: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"\nError running command: {' '.join(command)}", file=sys.stderr)
|
||||
print("\n--- STDERR ---", file=sys.stderr)
|
||||
print(e.stderr, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
# Import the refactored functions from other scripts
|
||||
from process_territories import process_data
|
||||
from analysis import generate_analysis_artifacts
|
||||
from category_analysis import generate_category_map
|
||||
|
||||
def process_files(addresses_file, boundaries_file):
|
||||
def process_and_save(addresses_file, boundaries_file):
|
||||
"""
|
||||
Runs the processing script to generate the 'Final' CSV.
|
||||
Returns the path to the generated file or exits on error.
|
||||
Runs the processing script and saves the result to a 'Final' CSV.
|
||||
Returns the path to the generated file.
|
||||
"""
|
||||
print("\n--- Step 1: Processing territory files ---")
|
||||
|
||||
# Check if input files exist
|
||||
if not os.path.exists(addresses_file):
|
||||
print(f"Error: Address file not found at '{addresses_file}'", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
if not os.path.exists(boundaries_file):
|
||||
print(f"Error: Boundaries file not found at '{boundaries_file}'", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
process_command = ["python", "process_territories.py", addresses_file, boundaries_file]
|
||||
run_script(process_command)
|
||||
|
||||
# Find the most recently modified "Final.csv"
|
||||
try:
|
||||
final_files = [f for f in os.listdir('.') if "Final.csv" in f and os.path.isfile(f)]
|
||||
if not final_files:
|
||||
print("Error: No 'Final.csv' file found after processing.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
latest_file = max(final_files, key=os.path.getmtime)
|
||||
print(f"Generated file: {latest_file}")
|
||||
return latest_file
|
||||
except Exception as e:
|
||||
print(f"Error locating processed file: {e}", file=sys.stderr)
|
||||
# Process data in memory
|
||||
processed_df = process_data(addresses_file, boundaries_file)
|
||||
|
||||
# Save the processed DataFrame to a CSV file
|
||||
date_str = datetime.now().strftime('%b %Y')
|
||||
output_filename = f'Okinawa Territory {date_str} - Final.csv'
|
||||
|
||||
processed_df.to_csv(output_filename, index=False)
|
||||
print(f"✓ Success! Generated file: {output_filename}")
|
||||
return output_filename
|
||||
|
||||
except (FileNotFoundError, Exception) as e:
|
||||
print(f"\nError during file processing: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
def run_analysis(processed_file_path):
|
||||
def analyze_from_file(processed_file_path):
|
||||
"""
|
||||
Runs the analysis scripts on the processed file.
|
||||
Reads a processed file and runs the analysis scripts on it.
|
||||
"""
|
||||
if not processed_file_path or not os.path.exists(processed_file_path):
|
||||
print(f"\nError: Processed file not found at '{processed_file_path}'. Please run the 'process' step first.", file=sys.stderr)
|
||||
print("\n--- Step 2: Running analysis from file ---")
|
||||
try:
|
||||
# Read the processed file into a DataFrame
|
||||
df = pd.read_csv(processed_file_path)
|
||||
|
||||
# Run the analysis functions
|
||||
generate_analysis_artifacts(df)
|
||||
generate_category_map(df)
|
||||
|
||||
print("\n✓ Analysis complete!")
|
||||
print("Generated files: analysis.md, map.html, category_map.html")
|
||||
|
||||
except FileNotFoundError as e:
|
||||
print(f"\nError: Processed file not found at '{processed_file_path}'.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\nAn unexpected error occurred during analysis: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
def full_run_in_memory(addresses_file, boundaries_file):
|
||||
"""
|
||||
Processes data and runs analysis entirely in memory.
|
||||
"""
|
||||
print("\n--- Running full pipeline in memory ---")
|
||||
try:
|
||||
# Step 1: Process data
|
||||
print("Processing data...")
|
||||
processed_df = process_data(addresses_file, boundaries_file)
|
||||
print("✓ Data processing complete.")
|
||||
|
||||
# Step 2: Run analysis
|
||||
print("\nRunning analysis...")
|
||||
generate_analysis_artifacts(processed_df)
|
||||
generate_category_map(processed_df)
|
||||
|
||||
print("\n--- Step 2: Running analysis scripts ---")
|
||||
|
||||
analysis_command = ["python", "analysis.py", processed_file_path]
|
||||
run_script(analysis_command)
|
||||
|
||||
category_analysis_command = ["python", "category_analysis.py", processed_file_path]
|
||||
run_script(category_analysis_command)
|
||||
|
||||
print("\nAnalysis complete!")
|
||||
print("Generated files: analysis.md, map.html, category_map.html")
|
||||
print("\n✓ Analysis complete!")
|
||||
print("Generated files: analysis.md, map.html, category_map.html")
|
||||
|
||||
except (FileNotFoundError, Exception) as e:
|
||||
print(f"\nAn error occurred during the full run: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
"""Parses command-line arguments and orchestrates the workflow."""
|
||||
parser = argparse.ArgumentParser(description="Territory Analysis Tool")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Territory Analysis Tool v1.2.0",
|
||||
formatter_class=argparse.RawTextHelpFormatter
|
||||
)
|
||||
subparsers = parser.add_subparsers(dest="command", required=True, help="Available commands")
|
||||
|
||||
# Sub-command for 'process'
|
||||
parser_process = subparsers.add_parser("process", help="Step 1: Process raw address and boundary files into a final CSV.")
|
||||
parser_process = subparsers.add_parser("process", help="Step 1: Process raw files and save the result to a CSV.")
|
||||
parser_process.add_argument("--addresses", required=True, help="Path to the addresses CSV file.")
|
||||
parser_process.add_argument("--boundaries", required=True, help="Path to the boundaries CSV file.")
|
||||
|
||||
@@ -92,26 +97,22 @@ def main():
|
||||
parser_analyze.add_argument("--input", required=True, help="Path to the processed 'Final' CSV file.")
|
||||
|
||||
# Sub-command for 'full-run'
|
||||
parser_full_run = subparsers.add_parser("full-run", help="Run the full pipeline: process and then analyze.")
|
||||
parser_full_run = subparsers.add_parser("full-run", help="Run the full pipeline (process and analyze) in memory.")
|
||||
parser_full_run.add_argument("--addresses", required=True, help="Path to the addresses CSV file.")
|
||||
parser_full_run.add_argument("--boundaries", required=True, help="Path to the boundaries CSV file.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "process":
|
||||
process_files(args.addresses, args.boundaries)
|
||||
process_and_save(args.addresses, args.boundaries)
|
||||
|
||||
elif args.command == "analyze":
|
||||
run_analysis(args.input)
|
||||
analyze_from_file(args.input)
|
||||
|
||||
elif args.command == "full-run":
|
||||
# Run step 1
|
||||
processed_file = process_files(args.addresses, args.boundaries)
|
||||
# Run step 2
|
||||
run_analysis(processed_file)
|
||||
full_run_in_memory(args.addresses, args.boundaries)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Change working directory to the script's directory
|
||||
# This makes file paths relative to the script's location
|
||||
# Ensure the script runs in its own directory context
|
||||
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user