Spaces:

GerardCB
/

GeoQuery

Running

File size: 4,529 Bytes
#!/usr/bin/env python3
"""
Extract additional features from existing Overture Maps data
- Hospitals, clinics, pharmacies
- Government offices
- Tourist attractions
- Restaurants, hotels
"""

import geopandas as gpd
from pathlib import Path
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

DATA_DIR = Path(__file__).parent.parent / "data"
OVERTURE_DIR = DATA_DIR / "overture"
OUTPUT_DIR = DATA_DIR / "enriched"

def extract_healthcare():
    """Extract healthcare facilities from Overture places"""
    logger.info("Extracting healthcare facilities...")
    
    places_path = OVERTURE_DIR / "places.geojson"
    gdf = gpd.read_file(places_path)
    
    # Filter for healthcare
    healthcare_categories = ['hospital', 'clinic', 'pharmacy', 'doctor', 'dentist', 'health']
    healthcare_gdf = gdf[gdf['category'].str.contains('|'.join(healthcare_categories), case=False, na=False)]
    
    logger.info(f"Found {len(healthcare_gdf)} healthcare facilities")
    
    # Save
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    output_path = OUTPUT_DIR / "healthcare_facilities.geojson"
    healthcare_gdf.to_file(output_path, driver='GeoJSON')
    
    return output_path, len(healthcare_gdf)

def extract_tourism():
    """Extract tourist attractions"""
    logger.info("Extracting tourist attractions...")
    
    places_path = OVERTURE_DIR / "places.geojson"
    gdf = gpd.read_file(places_path)
    
    # Filter for tourism
    tourism_categories = ['museum', 'monument', 'attraction', 'park', 'beach', 'viewpoint', 'zoo', 'aquarium']
    tourism_gdf = gdf[gdf['category'].str.contains('|'.join(tourism_categories), case=False, na=False)]
    
    logger.info(f"Found {len(tourism_gdf)} tourist attractions")
    
    # Save
    output_path = OUTPUT_DIR / "tourist_attractions.geojson"
    tourism_gdf.to_file(output_path, driver='GeoJSON')
    
    return output_path, len(tourism_gdf)

def extract_accommodation():
    """Extract hotels and accommodation"""
    logger.info("Extracting accommodation...")
    
    places_path = OVERTURE_DIR / "places.geojson"
    gdf = gpd.read_file(places_path)
    
    # Filter for accommodation
    accommodation_categories = ['hotel', 'hostel', 'motel', 'resort', 'lodge', 'guest_house']
    accommodation_gdf = gdf[gdf['category'].str.contains('|'.join(accommodation_categories), case=False, na=False)]
    
    logger.info(f"Found {len(accommodation_gdf)} accommodation facilities")
    
    # Save
    output_path = OUTPUT_DIR / "accommodation.geojson"
    accommodation_gdf.to_file(output_path, driver='GeoJSON')
    
    return output_path, len(accommodation_gdf)

def extract_restaurants():
    """Extract restaurants and food services"""
    logger.info("Extracting restaurants...")
    
    places_path = OVERTURE_DIR / "places.geojson"
    gdf = gpd.read_file(places_path)
    
    # Filter for restaurants
    restaurant_categories = ['restaurant', 'cafe', 'bar', 'fast_food', 'food_court']
    restaurant_gdf = gdf[gdf['category'].str.contains('|'.join(restaurant_categories), case=False, na=False)]
    
    logger.info(f"Found {len(restaurant_gdf)} restaurants/cafes")
    
    # Save
    output_path = OUTPUT_DIR / "restaurants.geojson"
    restaurant_gdf.to_file(output_path, driver='GeoJSON')
    
    return output_path, len(restaurant_gdf)

def main():
    logger.info("=== Extracting features from Overture data ===")
    
    results = []
    
    try:
        path, count = extract_healthcare()
        results.append({"dataset": "healthcare_facilities", "count": count})
    except Exception as e:
        logger.error(f"Failed healthcare extraction: {e}")
    
    try:
        path, count = extract_tourism()
        results.append({"dataset": "tourist_attractions", "count": count})
    except Exception as e:
        logger.error(f"Failed tourism extraction: {e}")
    
    try:
        path, count = extract_accommodation()
        results.append({"dataset": "accommodation", "count": count})
    except Exception as e:
        logger.error(f"Failed accommodation extraction: {e}")
    
    try:
        path, count = extract_restaurants()
        results.append({"dataset": "restaurants", "count": count})
    except Exception as e:
        logger.error(f"Failed restaurant extraction: {e}")
    
    logger.info("\n=== Extraction Summary ===")
    for result in results:
        logger.info(f"  {result['dataset']}: {result['count']} features")
    
    return results

if __name__ == "__main__":
    main()