#!/usr/bin/env python3 """ Extract additional features from existing Overture Maps data - Hospitals, clinics, pharmacies - Government offices - Tourist attractions - Restaurants, hotels """ import geopandas as gpd from pathlib import Path import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) DATA_DIR = Path(__file__).parent.parent / "data" OVERTURE_DIR = DATA_DIR / "overture" OUTPUT_DIR = DATA_DIR / "enriched" def extract_healthcare(): """Extract healthcare facilities from Overture places""" logger.info("Extracting healthcare facilities...") places_path = OVERTURE_DIR / "places.geojson" gdf = gpd.read_file(places_path) # Filter for healthcare healthcare_categories = ['hospital', 'clinic', 'pharmacy', 'doctor', 'dentist', 'health'] healthcare_gdf = gdf[gdf['category'].str.contains('|'.join(healthcare_categories), case=False, na=False)] logger.info(f"Found {len(healthcare_gdf)} healthcare facilities") # Save OUTPUT_DIR.mkdir(parents=True, exist_ok=True) output_path = OUTPUT_DIR / "healthcare_facilities.geojson" healthcare_gdf.to_file(output_path, driver='GeoJSON') return output_path, len(healthcare_gdf) def extract_tourism(): """Extract tourist attractions""" logger.info("Extracting tourist attractions...") places_path = OVERTURE_DIR / "places.geojson" gdf = gpd.read_file(places_path) # Filter for tourism tourism_categories = ['museum', 'monument', 'attraction', 'park', 'beach', 'viewpoint', 'zoo', 'aquarium'] tourism_gdf = gdf[gdf['category'].str.contains('|'.join(tourism_categories), case=False, na=False)] logger.info(f"Found {len(tourism_gdf)} tourist attractions") # Save output_path = OUTPUT_DIR / "tourist_attractions.geojson" tourism_gdf.to_file(output_path, driver='GeoJSON') return output_path, len(tourism_gdf) def extract_accommodation(): """Extract hotels and accommodation""" logger.info("Extracting accommodation...") places_path = OVERTURE_DIR / "places.geojson" gdf = gpd.read_file(places_path) # Filter for accommodation accommodation_categories = ['hotel', 'hostel', 'motel', 'resort', 'lodge', 'guest_house'] accommodation_gdf = gdf[gdf['category'].str.contains('|'.join(accommodation_categories), case=False, na=False)] logger.info(f"Found {len(accommodation_gdf)} accommodation facilities") # Save output_path = OUTPUT_DIR / "accommodation.geojson" accommodation_gdf.to_file(output_path, driver='GeoJSON') return output_path, len(accommodation_gdf) def extract_restaurants(): """Extract restaurants and food services""" logger.info("Extracting restaurants...") places_path = OVERTURE_DIR / "places.geojson" gdf = gpd.read_file(places_path) # Filter for restaurants restaurant_categories = ['restaurant', 'cafe', 'bar', 'fast_food', 'food_court'] restaurant_gdf = gdf[gdf['category'].str.contains('|'.join(restaurant_categories), case=False, na=False)] logger.info(f"Found {len(restaurant_gdf)} restaurants/cafes") # Save output_path = OUTPUT_DIR / "restaurants.geojson" restaurant_gdf.to_file(output_path, driver='GeoJSON') return output_path, len(restaurant_gdf) def main(): logger.info("=== Extracting features from Overture data ===") results = [] try: path, count = extract_healthcare() results.append({"dataset": "healthcare_facilities", "count": count}) except Exception as e: logger.error(f"Failed healthcare extraction: {e}") try: path, count = extract_tourism() results.append({"dataset": "tourist_attractions", "count": count}) except Exception as e: logger.error(f"Failed tourism extraction: {e}") try: path, count = extract_accommodation() results.append({"dataset": "accommodation", "count": count}) except Exception as e: logger.error(f"Failed accommodation extraction: {e}") try: path, count = extract_restaurants() results.append({"dataset": "restaurants", "count": count}) except Exception as e: logger.error(f"Failed restaurant extraction: {e}") logger.info("\n=== Extraction Summary ===") for result in results: logger.info(f" {result['dataset']}: {result['count']} features") return results if __name__ == "__main__": main()