Refactor: Integrate scripts into a single application (v1.2.0)

2025-12-29 16:45:40 -06:00
parent 671741772f
commit 5bd154fb4e
6 changed files with 213 additions and 251 deletions
--- a/process_territories.py
+++ b/process_territories.py
@@ -1,53 +1,26 @@
-import csv
-import argparse
+import pandas as pd
 from datetime import datetime

-def process_territories(addresses_file, boundaries_file, final_file):
-    # Read the addresses and count occurrences of each TerritoryID
-    address_counts = {}
-    with open(addresses_file, 'r', encoding='utf-8-sig') as f:
-        reader = csv.DictReader(f)
-        for row in reader:
-            territory_id = row['TerritoryID']
-            if territory_id:
-                address_counts[territory_id] = address_counts.get(territory_id, 0) + 1
+def process_data(addresses_file, boundaries_file):
+    """
+    Reads address and boundary CSVs, merges them, and returns a consolidated DataFrame.
+    """
+    try:
+        # Read the addresses and count occurrences of each TerritoryID
+        address_counts = pd.read_csv(addresses_file).groupby('TerritoryID').size().reset_index(name='Address Count')

-    # Read the boundaries file and write to the final file
-    with open(boundaries_file, 'r', encoding='utf-8-sig') as f_in, \
-         open(final_file, 'w', newline='', encoding='utf-8') as f_out:
-        
-        reader = csv.DictReader(f_in)
-        
-        # Define the headers for the output file
-        fieldnames = ['TerritoryID', 'CategoryCode', 'Number', 'Area', 'Boundary', 'Address Count']
-        writer = csv.DictWriter(f_out, fieldnames=fieldnames)
-        writer.writeheader()
+        # Read the boundaries file
+        boundaries_df = pd.read_csv(boundaries_file)

-        for row in reader:
-            territory_id = row['TerritoryID']
-            
-            # Get the address count for the current territory
-            address_count = address_counts.get(territory_id, 0)
+        # Merge the address counts with the boundaries data
+        merged_df = pd.merge(boundaries_df, address_counts, on='TerritoryID', how='left')

-            # Write the new row to the final file
-            writer.writerow({
-                'TerritoryID': territory_id,
-                'CategoryCode': row.get('CategoryCode', ''),
-                'Number': row.get('Number', ''),
-                'Area': row.get('Area', ''),
-                'Boundary': row.get('Boundary', ''),
-                'Address Count': address_count
-            })
+        # Fill missing address counts with 0 and ensure the column is integer type
+        merged_df['Address Count'] = merged_df['Address Count'].fillna(0).astype(int)

-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Process territory data.')
-    parser.add_argument('addresses_file', help='The path to the addresses CSV file.')
-    parser.add_argument('boundaries_file', help='The path to the boundaries CSV file.')
-    args = parser.parse_args()
+        return merged_df

-    # Generate the output file name
-    date_str = datetime.now().strftime('%b %Y')
-    output_file = f'Okinawa Territory {date_str} - Final.csv'
-
-    process_territories(args.addresses_file, args.boundaries_file, output_file)
-    print(f"Processing complete. Output written to {output_file}")
+    except FileNotFoundError as e:
+        raise FileNotFoundError(f"Error during data processing: {e}")
+    except Exception as e:
+        raise Exception(f"An unexpected error occurred during data processing: {e}")