File size: 4,529 Bytes
4851501 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
#!/usr/bin/env python3
"""
Extract additional features from existing Overture Maps data
- Hospitals, clinics, pharmacies
- Government offices
- Tourist attractions
- Restaurants, hotels
"""
import geopandas as gpd
from pathlib import Path
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
DATA_DIR = Path(__file__).parent.parent / "data"
OVERTURE_DIR = DATA_DIR / "overture"
OUTPUT_DIR = DATA_DIR / "enriched"
def extract_healthcare():
"""Extract healthcare facilities from Overture places"""
logger.info("Extracting healthcare facilities...")
places_path = OVERTURE_DIR / "places.geojson"
gdf = gpd.read_file(places_path)
# Filter for healthcare
healthcare_categories = ['hospital', 'clinic', 'pharmacy', 'doctor', 'dentist', 'health']
healthcare_gdf = gdf[gdf['category'].str.contains('|'.join(healthcare_categories), case=False, na=False)]
logger.info(f"Found {len(healthcare_gdf)} healthcare facilities")
# Save
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output_path = OUTPUT_DIR / "healthcare_facilities.geojson"
healthcare_gdf.to_file(output_path, driver='GeoJSON')
return output_path, len(healthcare_gdf)
def extract_tourism():
"""Extract tourist attractions"""
logger.info("Extracting tourist attractions...")
places_path = OVERTURE_DIR / "places.geojson"
gdf = gpd.read_file(places_path)
# Filter for tourism
tourism_categories = ['museum', 'monument', 'attraction', 'park', 'beach', 'viewpoint', 'zoo', 'aquarium']
tourism_gdf = gdf[gdf['category'].str.contains('|'.join(tourism_categories), case=False, na=False)]
logger.info(f"Found {len(tourism_gdf)} tourist attractions")
# Save
output_path = OUTPUT_DIR / "tourist_attractions.geojson"
tourism_gdf.to_file(output_path, driver='GeoJSON')
return output_path, len(tourism_gdf)
def extract_accommodation():
"""Extract hotels and accommodation"""
logger.info("Extracting accommodation...")
places_path = OVERTURE_DIR / "places.geojson"
gdf = gpd.read_file(places_path)
# Filter for accommodation
accommodation_categories = ['hotel', 'hostel', 'motel', 'resort', 'lodge', 'guest_house']
accommodation_gdf = gdf[gdf['category'].str.contains('|'.join(accommodation_categories), case=False, na=False)]
logger.info(f"Found {len(accommodation_gdf)} accommodation facilities")
# Save
output_path = OUTPUT_DIR / "accommodation.geojson"
accommodation_gdf.to_file(output_path, driver='GeoJSON')
return output_path, len(accommodation_gdf)
def extract_restaurants():
"""Extract restaurants and food services"""
logger.info("Extracting restaurants...")
places_path = OVERTURE_DIR / "places.geojson"
gdf = gpd.read_file(places_path)
# Filter for restaurants
restaurant_categories = ['restaurant', 'cafe', 'bar', 'fast_food', 'food_court']
restaurant_gdf = gdf[gdf['category'].str.contains('|'.join(restaurant_categories), case=False, na=False)]
logger.info(f"Found {len(restaurant_gdf)} restaurants/cafes")
# Save
output_path = OUTPUT_DIR / "restaurants.geojson"
restaurant_gdf.to_file(output_path, driver='GeoJSON')
return output_path, len(restaurant_gdf)
def main():
logger.info("=== Extracting features from Overture data ===")
results = []
try:
path, count = extract_healthcare()
results.append({"dataset": "healthcare_facilities", "count": count})
except Exception as e:
logger.error(f"Failed healthcare extraction: {e}")
try:
path, count = extract_tourism()
results.append({"dataset": "tourist_attractions", "count": count})
except Exception as e:
logger.error(f"Failed tourism extraction: {e}")
try:
path, count = extract_accommodation()
results.append({"dataset": "accommodation", "count": count})
except Exception as e:
logger.error(f"Failed accommodation extraction: {e}")
try:
path, count = extract_restaurants()
results.append({"dataset": "restaurants", "count": count})
except Exception as e:
logger.error(f"Failed restaurant extraction: {e}")
logger.info("\n=== Extraction Summary ===")
for result in results:
logger.info(f" {result['dataset']}: {result['count']} features")
return results
if __name__ == "__main__":
main()
|