GeoQuery / backend /scripts /extract_overture_features.py
GerardCB's picture
Deploy to Spaces (Final Clean)
4851501
#!/usr/bin/env python3
"""
Extract additional features from existing Overture Maps data
- Hospitals, clinics, pharmacies
- Government offices
- Tourist attractions
- Restaurants, hotels
"""
import geopandas as gpd
from pathlib import Path
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
DATA_DIR = Path(__file__).parent.parent / "data"
OVERTURE_DIR = DATA_DIR / "overture"
OUTPUT_DIR = DATA_DIR / "enriched"
def extract_healthcare():
"""Extract healthcare facilities from Overture places"""
logger.info("Extracting healthcare facilities...")
places_path = OVERTURE_DIR / "places.geojson"
gdf = gpd.read_file(places_path)
# Filter for healthcare
healthcare_categories = ['hospital', 'clinic', 'pharmacy', 'doctor', 'dentist', 'health']
healthcare_gdf = gdf[gdf['category'].str.contains('|'.join(healthcare_categories), case=False, na=False)]
logger.info(f"Found {len(healthcare_gdf)} healthcare facilities")
# Save
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output_path = OUTPUT_DIR / "healthcare_facilities.geojson"
healthcare_gdf.to_file(output_path, driver='GeoJSON')
return output_path, len(healthcare_gdf)
def extract_tourism():
"""Extract tourist attractions"""
logger.info("Extracting tourist attractions...")
places_path = OVERTURE_DIR / "places.geojson"
gdf = gpd.read_file(places_path)
# Filter for tourism
tourism_categories = ['museum', 'monument', 'attraction', 'park', 'beach', 'viewpoint', 'zoo', 'aquarium']
tourism_gdf = gdf[gdf['category'].str.contains('|'.join(tourism_categories), case=False, na=False)]
logger.info(f"Found {len(tourism_gdf)} tourist attractions")
# Save
output_path = OUTPUT_DIR / "tourist_attractions.geojson"
tourism_gdf.to_file(output_path, driver='GeoJSON')
return output_path, len(tourism_gdf)
def extract_accommodation():
"""Extract hotels and accommodation"""
logger.info("Extracting accommodation...")
places_path = OVERTURE_DIR / "places.geojson"
gdf = gpd.read_file(places_path)
# Filter for accommodation
accommodation_categories = ['hotel', 'hostel', 'motel', 'resort', 'lodge', 'guest_house']
accommodation_gdf = gdf[gdf['category'].str.contains('|'.join(accommodation_categories), case=False, na=False)]
logger.info(f"Found {len(accommodation_gdf)} accommodation facilities")
# Save
output_path = OUTPUT_DIR / "accommodation.geojson"
accommodation_gdf.to_file(output_path, driver='GeoJSON')
return output_path, len(accommodation_gdf)
def extract_restaurants():
"""Extract restaurants and food services"""
logger.info("Extracting restaurants...")
places_path = OVERTURE_DIR / "places.geojson"
gdf = gpd.read_file(places_path)
# Filter for restaurants
restaurant_categories = ['restaurant', 'cafe', 'bar', 'fast_food', 'food_court']
restaurant_gdf = gdf[gdf['category'].str.contains('|'.join(restaurant_categories), case=False, na=False)]
logger.info(f"Found {len(restaurant_gdf)} restaurants/cafes")
# Save
output_path = OUTPUT_DIR / "restaurants.geojson"
restaurant_gdf.to_file(output_path, driver='GeoJSON')
return output_path, len(restaurant_gdf)
def main():
logger.info("=== Extracting features from Overture data ===")
results = []
try:
path, count = extract_healthcare()
results.append({"dataset": "healthcare_facilities", "count": count})
except Exception as e:
logger.error(f"Failed healthcare extraction: {e}")
try:
path, count = extract_tourism()
results.append({"dataset": "tourist_attractions", "count": count})
except Exception as e:
logger.error(f"Failed tourism extraction: {e}")
try:
path, count = extract_accommodation()
results.append({"dataset": "accommodation", "count": count})
except Exception as e:
logger.error(f"Failed accommodation extraction: {e}")
try:
path, count = extract_restaurants()
results.append({"dataset": "restaurants", "count": count})
except Exception as e:
logger.error(f"Failed restaurant extraction: {e}")
logger.info("\n=== Extraction Summary ===")
for result in results:
logger.info(f" {result['dataset']}: {result['count']} features")
return results
if __name__ == "__main__":
main()