Refactor: Integrate scripts into a single application (v1.2.0)
This commit is contained in:
@@ -1,53 +1,26 @@
|
||||
import csv
|
||||
import argparse
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
def process_territories(addresses_file, boundaries_file, final_file):
|
||||
# Read the addresses and count occurrences of each TerritoryID
|
||||
address_counts = {}
|
||||
with open(addresses_file, 'r', encoding='utf-8-sig') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
territory_id = row['TerritoryID']
|
||||
if territory_id:
|
||||
address_counts[territory_id] = address_counts.get(territory_id, 0) + 1
|
||||
def process_data(addresses_file, boundaries_file):
|
||||
"""
|
||||
Reads address and boundary CSVs, merges them, and returns a consolidated DataFrame.
|
||||
"""
|
||||
try:
|
||||
# Read the addresses and count occurrences of each TerritoryID
|
||||
address_counts = pd.read_csv(addresses_file).groupby('TerritoryID').size().reset_index(name='Address Count')
|
||||
|
||||
# Read the boundaries file and write to the final file
|
||||
with open(boundaries_file, 'r', encoding='utf-8-sig') as f_in, \
|
||||
open(final_file, 'w', newline='', encoding='utf-8') as f_out:
|
||||
|
||||
reader = csv.DictReader(f_in)
|
||||
|
||||
# Define the headers for the output file
|
||||
fieldnames = ['TerritoryID', 'CategoryCode', 'Number', 'Area', 'Boundary', 'Address Count']
|
||||
writer = csv.DictWriter(f_out, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
# Read the boundaries file
|
||||
boundaries_df = pd.read_csv(boundaries_file)
|
||||
|
||||
for row in reader:
|
||||
territory_id = row['TerritoryID']
|
||||
|
||||
# Get the address count for the current territory
|
||||
address_count = address_counts.get(territory_id, 0)
|
||||
# Merge the address counts with the boundaries data
|
||||
merged_df = pd.merge(boundaries_df, address_counts, on='TerritoryID', how='left')
|
||||
|
||||
# Write the new row to the final file
|
||||
writer.writerow({
|
||||
'TerritoryID': territory_id,
|
||||
'CategoryCode': row.get('CategoryCode', ''),
|
||||
'Number': row.get('Number', ''),
|
||||
'Area': row.get('Area', ''),
|
||||
'Boundary': row.get('Boundary', ''),
|
||||
'Address Count': address_count
|
||||
})
|
||||
# Fill missing address counts with 0 and ensure the column is integer type
|
||||
merged_df['Address Count'] = merged_df['Address Count'].fillna(0).astype(int)
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Process territory data.')
|
||||
parser.add_argument('addresses_file', help='The path to the addresses CSV file.')
|
||||
parser.add_argument('boundaries_file', help='The path to the boundaries CSV file.')
|
||||
args = parser.parse_args()
|
||||
return merged_df
|
||||
|
||||
# Generate the output file name
|
||||
date_str = datetime.now().strftime('%b %Y')
|
||||
output_file = f'Okinawa Territory {date_str} - Final.csv'
|
||||
|
||||
process_territories(args.addresses_file, args.boundaries_file, output_file)
|
||||
print(f"Processing complete. Output written to {output_file}")
|
||||
except FileNotFoundError as e:
|
||||
raise FileNotFoundError(f"Error during data processing: {e}")
|
||||
except Exception as e:
|
||||
raise Exception(f"An unexpected error occurred during data processing: {e}")
|
||||
Reference in New Issue
Block a user