|
|
|
|
|
""" |
|
|
Extract additional features from existing Overture Maps data |
|
|
- Hospitals, clinics, pharmacies |
|
|
- Government offices |
|
|
- Tourist attractions |
|
|
- Restaurants, hotels |
|
|
""" |
|
|
|
|
|
import geopandas as gpd |
|
|
from pathlib import Path |
|
|
import logging |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
DATA_DIR = Path(__file__).parent.parent / "data" |
|
|
OVERTURE_DIR = DATA_DIR / "overture" |
|
|
OUTPUT_DIR = DATA_DIR / "enriched" |
|
|
|
|
|
def extract_healthcare(): |
|
|
"""Extract healthcare facilities from Overture places""" |
|
|
logger.info("Extracting healthcare facilities...") |
|
|
|
|
|
places_path = OVERTURE_DIR / "places.geojson" |
|
|
gdf = gpd.read_file(places_path) |
|
|
|
|
|
|
|
|
healthcare_categories = ['hospital', 'clinic', 'pharmacy', 'doctor', 'dentist', 'health'] |
|
|
healthcare_gdf = gdf[gdf['category'].str.contains('|'.join(healthcare_categories), case=False, na=False)] |
|
|
|
|
|
logger.info(f"Found {len(healthcare_gdf)} healthcare facilities") |
|
|
|
|
|
|
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True) |
|
|
output_path = OUTPUT_DIR / "healthcare_facilities.geojson" |
|
|
healthcare_gdf.to_file(output_path, driver='GeoJSON') |
|
|
|
|
|
return output_path, len(healthcare_gdf) |
|
|
|
|
|
def extract_tourism(): |
|
|
"""Extract tourist attractions""" |
|
|
logger.info("Extracting tourist attractions...") |
|
|
|
|
|
places_path = OVERTURE_DIR / "places.geojson" |
|
|
gdf = gpd.read_file(places_path) |
|
|
|
|
|
|
|
|
tourism_categories = ['museum', 'monument', 'attraction', 'park', 'beach', 'viewpoint', 'zoo', 'aquarium'] |
|
|
tourism_gdf = gdf[gdf['category'].str.contains('|'.join(tourism_categories), case=False, na=False)] |
|
|
|
|
|
logger.info(f"Found {len(tourism_gdf)} tourist attractions") |
|
|
|
|
|
|
|
|
output_path = OUTPUT_DIR / "tourist_attractions.geojson" |
|
|
tourism_gdf.to_file(output_path, driver='GeoJSON') |
|
|
|
|
|
return output_path, len(tourism_gdf) |
|
|
|
|
|
def extract_accommodation(): |
|
|
"""Extract hotels and accommodation""" |
|
|
logger.info("Extracting accommodation...") |
|
|
|
|
|
places_path = OVERTURE_DIR / "places.geojson" |
|
|
gdf = gpd.read_file(places_path) |
|
|
|
|
|
|
|
|
accommodation_categories = ['hotel', 'hostel', 'motel', 'resort', 'lodge', 'guest_house'] |
|
|
accommodation_gdf = gdf[gdf['category'].str.contains('|'.join(accommodation_categories), case=False, na=False)] |
|
|
|
|
|
logger.info(f"Found {len(accommodation_gdf)} accommodation facilities") |
|
|
|
|
|
|
|
|
output_path = OUTPUT_DIR / "accommodation.geojson" |
|
|
accommodation_gdf.to_file(output_path, driver='GeoJSON') |
|
|
|
|
|
return output_path, len(accommodation_gdf) |
|
|
|
|
|
def extract_restaurants(): |
|
|
"""Extract restaurants and food services""" |
|
|
logger.info("Extracting restaurants...") |
|
|
|
|
|
places_path = OVERTURE_DIR / "places.geojson" |
|
|
gdf = gpd.read_file(places_path) |
|
|
|
|
|
|
|
|
restaurant_categories = ['restaurant', 'cafe', 'bar', 'fast_food', 'food_court'] |
|
|
restaurant_gdf = gdf[gdf['category'].str.contains('|'.join(restaurant_categories), case=False, na=False)] |
|
|
|
|
|
logger.info(f"Found {len(restaurant_gdf)} restaurants/cafes") |
|
|
|
|
|
|
|
|
output_path = OUTPUT_DIR / "restaurants.geojson" |
|
|
restaurant_gdf.to_file(output_path, driver='GeoJSON') |
|
|
|
|
|
return output_path, len(restaurant_gdf) |
|
|
|
|
|
def main(): |
|
|
logger.info("=== Extracting features from Overture data ===") |
|
|
|
|
|
results = [] |
|
|
|
|
|
try: |
|
|
path, count = extract_healthcare() |
|
|
results.append({"dataset": "healthcare_facilities", "count": count}) |
|
|
except Exception as e: |
|
|
logger.error(f"Failed healthcare extraction: {e}") |
|
|
|
|
|
try: |
|
|
path, count = extract_tourism() |
|
|
results.append({"dataset": "tourist_attractions", "count": count}) |
|
|
except Exception as e: |
|
|
logger.error(f"Failed tourism extraction: {e}") |
|
|
|
|
|
try: |
|
|
path, count = extract_accommodation() |
|
|
results.append({"dataset": "accommodation", "count": count}) |
|
|
except Exception as e: |
|
|
logger.error(f"Failed accommodation extraction: {e}") |
|
|
|
|
|
try: |
|
|
path, count = extract_restaurants() |
|
|
results.append({"dataset": "restaurants", "count": count}) |
|
|
except Exception as e: |
|
|
logger.error(f"Failed restaurant extraction: {e}") |
|
|
|
|
|
logger.info("\n=== Extraction Summary ===") |
|
|
for result in results: |
|
|
logger.info(f" {result['dataset']}: {result['count']} features") |
|
|
|
|
|
return results |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|