import pandas as pd from shapely.geometry import Polygon, Point import ast import logging import sys # --- Configure Logging --- log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') # File Handler file_handler = logging.FileHandler('run.log') file_handler.setFormatter(log_formatter) # Console Handler console_handler = logging.StreamHandler(sys.stdout) console_handler.setFormatter(log_formatter) # Root Logger root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) root_logger.addHandler(file_handler) root_logger.addHandler(console_handler) # --- Load and Prepare Territory Data --- logging.info("Loading and preparing territory data...") territory_file = 'TerritoryExport.csv' territories_df = pd.read_csv(territory_file) def parse_boundary_to_polygon(boundary_str): try: coords = ast.literal_eval(boundary_str) return Polygon(coords) except (ValueError, SyntaxError, TypeError): return None territories_df['Polygon'] = territories_df['Boundary'].apply(parse_boundary_to_polygon) territories_df.dropna(subset=['Polygon'], inplace=True) logging.info(f"Loaded {len(territories_df)} territories.") # --- Load and Prepare Address Data --- logging.info("Loading address data...") addresses_file = 'Addrsses.csv' addresses_df = pd.read_csv(addresses_file) logging.info(f"Found {len(addresses_df)} addresses to process.") # --- Process Each Address --- results = [] logging.info("Processing addresses...") for index, address_row in addresses_df.iterrows(): user_lat = address_row['Latitude'] user_lon = address_row['Longitude'] user_point = Point(user_lon, user_lat) found_territory_id = None for _, territory_row in territories_df.iterrows(): if territory_row['Polygon'].contains(user_point): found_territory_id = territory_row['TerritoryID'] break # If no territory was found, assign 'OUTSIDE_TERRITORY' if found_territory_id is None: found_territory_id = 'OUTSIDE_TERRITORY' # Replace the first column with the found TerritoryID address_row.iloc[0] = found_territory_id results.append(address_row) logging.info(f" Processed address {index + 1}/{len(addresses_df)}") logging.info("Processing complete.") # --- Save Results to a New CSV --- results_df = pd.DataFrame(results) output_file = 'Addresses_with_Territory.csv' results_df.to_csv(output_file, index=False) logging.info(f"Results saved to {output_file}")