Cleanup: Remove unused download scripts
Browse files- backend/scripts/download_geofabrik.py +0 -192
- backend/scripts/download_global_datasets.py +0 -133
- backend/scripts/download_hdx.py +0 -72
- backend/scripts/download_kontur.py +0 -239
- backend/scripts/download_overture.py +0 -133
- backend/scripts/download_stri_data.py +0 -79
- backend/scripts/download_worldbank.py +0 -141
backend/scripts/download_geofabrik.py
DELETED
|
@@ -1,192 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Panama Data Ingestion - Phase A: OpenStreetMap via Geofabrik
|
| 3 |
-
|
| 4 |
-
Downloads pre-packaged OSM data for Panama as shapefiles and converts to GeoJSON.
|
| 5 |
-
Data source: https://download.geofabrik.de/central-america.html
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
import os
|
| 9 |
-
import sys
|
| 10 |
-
import zipfile
|
| 11 |
-
import requests
|
| 12 |
-
import subprocess
|
| 13 |
-
from pathlib import Path
|
| 14 |
-
|
| 15 |
-
# Panama Geofabrik URL
|
| 16 |
-
GEOFABRIK_URL = "https://download.geofabrik.de/central-america/panama-latest-free.shp.zip"
|
| 17 |
-
|
| 18 |
-
# Output directories
|
| 19 |
-
DATA_DIR = Path(__file__).parent.parent / "data"
|
| 20 |
-
OSM_DIR = DATA_DIR / "osm"
|
| 21 |
-
TEMP_DIR = DATA_DIR / "temp"
|
| 22 |
-
|
| 23 |
-
# OSM layers to extract
|
| 24 |
-
OSM_LAYERS = [
|
| 25 |
-
("gis_osm_roads_free_1", "roads", "Road network with classification"),
|
| 26 |
-
("gis_osm_pois_free_1", "pois", "Points of interest (restaurants, shops, etc.)"),
|
| 27 |
-
("gis_osm_pois_a_free_1", "pois_areas", "POI areas (larger venues)"),
|
| 28 |
-
("gis_osm_buildings_a_free_1", "buildings", "Building footprints"),
|
| 29 |
-
("gis_osm_landuse_a_free_1", "landuse", "Land use zones (residential, commercial, etc.)"),
|
| 30 |
-
("gis_osm_natural_free_1", "natural_points", "Natural features (trees, peaks)"),
|
| 31 |
-
("gis_osm_natural_a_free_1", "natural_areas", "Natural areas (forests, parks)"),
|
| 32 |
-
("gis_osm_water_a_free_1", "water_areas", "Water bodies (lakes, reservoirs)"),
|
| 33 |
-
("gis_osm_waterways_free_1", "waterways", "Rivers and streams"),
|
| 34 |
-
("gis_osm_railways_free_1", "railways", "Railway lines"),
|
| 35 |
-
("gis_osm_traffic_free_1", "traffic", "Traffic infrastructure (signals, crossings)"),
|
| 36 |
-
("gis_osm_traffic_a_free_1", "traffic_areas", "Traffic areas (parking lots)"),
|
| 37 |
-
("gis_osm_transport_free_1", "transport", "Transport points (bus stops, stations)"),
|
| 38 |
-
("gis_osm_transport_a_free_1", "transport_areas", "Transport areas (airports, ports)"),
|
| 39 |
-
("gis_osm_places_free_1", "places", "Place names (cities, towns, villages)"),
|
| 40 |
-
("gis_osm_places_a_free_1", "places_areas", "Place areas"),
|
| 41 |
-
("gis_osm_pofw_free_1", "places_of_worship", "Places of worship"),
|
| 42 |
-
("gis_osm_pofw_a_free_1", "places_of_worship_areas", "Places of worship (buildings)"),
|
| 43 |
-
]
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
def download_file(url: str, dest: Path) -> bool:
|
| 47 |
-
"""Download a file with progress indication."""
|
| 48 |
-
print(f"📥 Downloading {url}...")
|
| 49 |
-
|
| 50 |
-
try:
|
| 51 |
-
response = requests.get(url, stream=True)
|
| 52 |
-
response.raise_for_status()
|
| 53 |
-
|
| 54 |
-
total_size = int(response.headers.get('content-length', 0))
|
| 55 |
-
downloaded = 0
|
| 56 |
-
|
| 57 |
-
with open(dest, 'wb') as f:
|
| 58 |
-
for chunk in response.iter_content(chunk_size=8192):
|
| 59 |
-
f.write(chunk)
|
| 60 |
-
downloaded += len(chunk)
|
| 61 |
-
if total_size > 0:
|
| 62 |
-
pct = (downloaded / total_size) * 100
|
| 63 |
-
print(f"\r Progress: {pct:.1f}% ({downloaded // 1024 // 1024}MB)", end="")
|
| 64 |
-
|
| 65 |
-
print(f"\n✅ Downloaded to {dest}")
|
| 66 |
-
return True
|
| 67 |
-
|
| 68 |
-
except Exception as e:
|
| 69 |
-
print(f"❌ Download failed: {e}")
|
| 70 |
-
return False
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
def convert_shp_to_geojson(shp_path: Path, geojson_path: Path) -> bool:
|
| 74 |
-
"""Convert shapefile to GeoJSON using ogr2ogr."""
|
| 75 |
-
try:
|
| 76 |
-
cmd = [
|
| 77 |
-
"ogr2ogr",
|
| 78 |
-
"-f", "GeoJSON",
|
| 79 |
-
"-t_srs", "EPSG:4326", # Ensure WGS84
|
| 80 |
-
str(geojson_path),
|
| 81 |
-
str(shp_path)
|
| 82 |
-
]
|
| 83 |
-
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 84 |
-
|
| 85 |
-
if result.returncode == 0:
|
| 86 |
-
return True
|
| 87 |
-
else:
|
| 88 |
-
print(f" ogr2ogr error: {result.stderr}")
|
| 89 |
-
return False
|
| 90 |
-
|
| 91 |
-
except FileNotFoundError:
|
| 92 |
-
print("⚠️ ogr2ogr not found. Please install GDAL:")
|
| 93 |
-
print(" brew install gdal # macOS")
|
| 94 |
-
print(" apt install gdal-bin # Ubuntu")
|
| 95 |
-
return False
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
def extract_and_convert():
|
| 99 |
-
"""Extract shapefiles from zip and convert to GeoJSON."""
|
| 100 |
-
|
| 101 |
-
# Ensure directories exist
|
| 102 |
-
OSM_DIR.mkdir(parents=True, exist_ok=True)
|
| 103 |
-
TEMP_DIR.mkdir(parents=True, exist_ok=True)
|
| 104 |
-
|
| 105 |
-
zip_path = TEMP_DIR / "panama-osm.zip"
|
| 106 |
-
|
| 107 |
-
# Download if not exists
|
| 108 |
-
if not zip_path.exists():
|
| 109 |
-
if not download_file(GEOFABRIK_URL, zip_path):
|
| 110 |
-
return False
|
| 111 |
-
else:
|
| 112 |
-
print(f"📦 Using cached {zip_path}")
|
| 113 |
-
|
| 114 |
-
# Extract
|
| 115 |
-
print(f"📂 Extracting to {TEMP_DIR}...")
|
| 116 |
-
with zipfile.ZipFile(zip_path, 'r') as zf:
|
| 117 |
-
zf.extractall(TEMP_DIR)
|
| 118 |
-
|
| 119 |
-
# Convert each layer
|
| 120 |
-
converted = 0
|
| 121 |
-
for shp_name, output_name, description in OSM_LAYERS:
|
| 122 |
-
shp_path = TEMP_DIR / f"{shp_name}.shp"
|
| 123 |
-
geojson_path = OSM_DIR / f"{output_name}.geojson"
|
| 124 |
-
|
| 125 |
-
if not shp_path.exists():
|
| 126 |
-
print(f"⏭️ Skipping {shp_name} (not in download)")
|
| 127 |
-
continue
|
| 128 |
-
|
| 129 |
-
print(f"🔄 Converting {shp_name} → {output_name}.geojson...")
|
| 130 |
-
|
| 131 |
-
if convert_shp_to_geojson(shp_path, geojson_path):
|
| 132 |
-
# Get file size
|
| 133 |
-
size_mb = geojson_path.stat().st_size / 1024 / 1024
|
| 134 |
-
print(f" ✅ Created {geojson_path.name} ({size_mb:.1f}MB)")
|
| 135 |
-
converted += 1
|
| 136 |
-
else:
|
| 137 |
-
print(f" ❌ Failed to convert {shp_name}")
|
| 138 |
-
|
| 139 |
-
print(f"\n🎉 Converted {converted}/{len(OSM_LAYERS)} OSM layers")
|
| 140 |
-
return converted > 0
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
def register_in_catalog():
|
| 144 |
-
"""Register OSM datasets in the catalog."""
|
| 145 |
-
import json
|
| 146 |
-
|
| 147 |
-
catalog_path = DATA_DIR / "catalog.json"
|
| 148 |
-
|
| 149 |
-
if catalog_path.exists():
|
| 150 |
-
with open(catalog_path) as f:
|
| 151 |
-
catalog = json.load(f)
|
| 152 |
-
else:
|
| 153 |
-
catalog = {}
|
| 154 |
-
|
| 155 |
-
for shp_name, output_name, description in OSM_LAYERS:
|
| 156 |
-
geojson_path = OSM_DIR / f"{output_name}.geojson"
|
| 157 |
-
|
| 158 |
-
if not geojson_path.exists():
|
| 159 |
-
continue
|
| 160 |
-
|
| 161 |
-
# Create catalog entry
|
| 162 |
-
table_name = f"osm_{output_name}"
|
| 163 |
-
rel_path = f"osm/{output_name}.geojson"
|
| 164 |
-
|
| 165 |
-
catalog[table_name] = {
|
| 166 |
-
"source_file": rel_path,
|
| 167 |
-
"source_type": "geojson",
|
| 168 |
-
"description": f"OpenStreetMap {description} for Panama",
|
| 169 |
-
"tags": ["osm", "panama", output_name.replace("_", " ")],
|
| 170 |
-
"data_type": "vector",
|
| 171 |
-
"geometry_type": "auto" # Will be detected on load
|
| 172 |
-
}
|
| 173 |
-
|
| 174 |
-
print(f"📝 Registered {table_name}")
|
| 175 |
-
|
| 176 |
-
with open(catalog_path, 'w') as f:
|
| 177 |
-
json.dump(catalog, f, indent=2)
|
| 178 |
-
|
| 179 |
-
print(f"✅ Updated catalog with OSM datasets")
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
if __name__ == "__main__":
|
| 183 |
-
print("=" * 60)
|
| 184 |
-
print("🗺️ Panama OSM Data Ingestion (Geofabrik)")
|
| 185 |
-
print("=" * 60)
|
| 186 |
-
|
| 187 |
-
if extract_and_convert():
|
| 188 |
-
register_in_catalog()
|
| 189 |
-
print("\n🚀 OSM data ready! Restart the backend to load new datasets.")
|
| 190 |
-
else:
|
| 191 |
-
print("\n❌ Ingestion failed")
|
| 192 |
-
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/scripts/download_global_datasets.py
DELETED
|
@@ -1,133 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Download global geo-referenced datasets for Panama
|
| 4 |
-
- OurAirports: Global airport database
|
| 5 |
-
- WRI Global Power Plant Database
|
| 6 |
-
- Other infrastructure datasets
|
| 7 |
-
"""
|
| 8 |
-
|
| 9 |
-
import requests
|
| 10 |
-
import pandas as pd
|
| 11 |
-
import geopandas as gpd
|
| 12 |
-
from pathlib import Path
|
| 13 |
-
import logging
|
| 14 |
-
|
| 15 |
-
logging.basicConfig(level=logging.INFO)
|
| 16 |
-
logger = logging.getLogger(__name__)
|
| 17 |
-
|
| 18 |
-
DATA_DIR = Path(__file__).parent.parent / "data" / "global"
|
| 19 |
-
|
| 20 |
-
# Dataset URLs
|
| 21 |
-
DATASETS = {
|
| 22 |
-
"airports": {
|
| 23 |
-
"url": "https://davidmegginson.github.io/ourairports-data/airports.csv",
|
| 24 |
-
"description": "OurAirports - Global airport database"
|
| 25 |
-
},
|
| 26 |
-
"power_plants": {
|
| 27 |
-
"url": "https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3/global_power_plant_database.csv",
|
| 28 |
-
"description": "WRI Global Power Plant Database v1.3"
|
| 29 |
-
}
|
| 30 |
-
}
|
| 31 |
-
|
| 32 |
-
def download_airports():
|
| 33 |
-
"""Download and process OurAir ports data for Panama"""
|
| 34 |
-
logger.info("Downloading OurAirports global database...")
|
| 35 |
-
|
| 36 |
-
url = DATASETS["airports"]["url"]
|
| 37 |
-
response = requests.get(url)
|
| 38 |
-
response.raise_for_status()
|
| 39 |
-
|
| 40 |
-
# Save raw CSV
|
| 41 |
-
output_dir = DATA_DIR / "airports"
|
| 42 |
-
output_dir.mkdir(parents=True, exist_ok=True)
|
| 43 |
-
|
| 44 |
-
csv_path = output_dir / "airports_global.csv"
|
| 45 |
-
with open(csv_path, 'wb') as f:
|
| 46 |
-
f.write(response.content)
|
| 47 |
-
|
| 48 |
-
logger.info(f"Saved raw airports data: {csv_path}")
|
| 49 |
-
|
| 50 |
-
# Filter for Panama (iso_country = PA)
|
| 51 |
-
df = pd.read_csv(csv_path)
|
| 52 |
-
panama_df = df[df['iso_country'] == 'PA'].copy()
|
| 53 |
-
|
| 54 |
-
logger.info(f"Found {len(panama_df)} airports in Panama")
|
| 55 |
-
|
| 56 |
-
# Convert to GeoDataFrame
|
| 57 |
-
gdf = gpd.GeoDataFrame(
|
| 58 |
-
panama_df,
|
| 59 |
-
geometry=gpd.points_from_xy(panama_df.longitude_deg, panama_df.latitude_deg),
|
| 60 |
-
crs="EPSG:4326"
|
| 61 |
-
)
|
| 62 |
-
|
| 63 |
-
# Save as GeoJSON
|
| 64 |
-
geojson_path = output_dir / "panama_airports.geojson"
|
| 65 |
-
gdf.to_file(geojson_path, driver='GeoJSON')
|
| 66 |
-
|
| 67 |
-
logger.info(f"Created GeoJSON: {geojson_path}")
|
| 68 |
-
return geojson_path, len(gdf)
|
| 69 |
-
|
| 70 |
-
def download_power_plants():
|
| 71 |
-
"""Download and process WRI Global Power Plant Database for Panama"""
|
| 72 |
-
logger.info("Downloading WRI Global Power Plant Database...")
|
| 73 |
-
|
| 74 |
-
url = DATASETS["power_plants"]["url"]
|
| 75 |
-
response = requests.get(url)
|
| 76 |
-
response.raise_for_status()
|
| 77 |
-
|
| 78 |
-
# Save raw CSV
|
| 79 |
-
output_dir = DATA_DIR / "power_plants"
|
| 80 |
-
output_dir.mkdir(parents=True, exist_ok=True)
|
| 81 |
-
|
| 82 |
-
csv_path = output_dir / "power_plants_global.csv"
|
| 83 |
-
with open(csv_path, 'wb') as f:
|
| 84 |
-
f.write(response.content)
|
| 85 |
-
|
| 86 |
-
logger.info(f"Saved raw power plants data: {csv_path}")
|
| 87 |
-
|
| 88 |
-
# Filter for Panama (country = PAN)
|
| 89 |
-
df = pd.read_csv(csv_path)
|
| 90 |
-
panama_df = df[df['country'] == 'PAN'].copy()
|
| 91 |
-
|
| 92 |
-
logger.info(f"Found {len(panama_df)} power plants in Panama")
|
| 93 |
-
|
| 94 |
-
# Convert to GeoDataFrame
|
| 95 |
-
gdf = gpd.GeoDataFrame(
|
| 96 |
-
panama_df,
|
| 97 |
-
geometry=gpd.points_from_xy(panama_df.longitude, panama_df.latitude),
|
| 98 |
-
crs="EPSG:4326"
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
# Save as GeoJSON
|
| 102 |
-
geojson_path = output_dir / "panama_power_plants.geojson"
|
| 103 |
-
gdf.to_file(geojson_path, driver='GeoJSON')
|
| 104 |
-
|
| 105 |
-
logger.info(f"Created GeoJSON: {geojson_path}")
|
| 106 |
-
return geojson_path, len(gdf)
|
| 107 |
-
|
| 108 |
-
def main():
|
| 109 |
-
logger.info("=== Global Dataset Download Starting ===")
|
| 110 |
-
|
| 111 |
-
results = []
|
| 112 |
-
|
| 113 |
-
try:
|
| 114 |
-
airports_path, airports_count = download_airports()
|
| 115 |
-
results.append({"dataset": "airports", "count": airports_count, "path": airports_path})
|
| 116 |
-
except Exception as e:
|
| 117 |
-
logger.error(f"Failed to download airports: {e}")
|
| 118 |
-
|
| 119 |
-
try:
|
| 120 |
-
power_path, power_count = download_power_plants()
|
| 121 |
-
results.append({"dataset": "power_plants", "count": power_count, "path": power_path})
|
| 122 |
-
except Exception as e:
|
| 123 |
-
logger.error(f"Failed to download power plants: {e}")
|
| 124 |
-
|
| 125 |
-
logger.info("\n=== Download Summary ===")
|
| 126 |
-
for result in results:
|
| 127 |
-
logger.info(f" {result['dataset']}: {result['count']} features")
|
| 128 |
-
|
| 129 |
-
logger.info("\n=== Complete ===")
|
| 130 |
-
return results
|
| 131 |
-
|
| 132 |
-
if __name__ == "__main__":
|
| 133 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/scripts/download_hdx.py
DELETED
|
@@ -1,72 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
HDX Data Downloader for Panama
|
| 4 |
-
Downloads official datasets from Humanitarian Data Exchange
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import requests
|
| 8 |
-
from pathlib import Path
|
| 9 |
-
import logging
|
| 10 |
-
|
| 11 |
-
logging.basicConfig(level=logging.INFO)
|
| 12 |
-
logger = logging.getLogger(__name__)
|
| 13 |
-
|
| 14 |
-
# HDX Dataset URLs (from research)
|
| 15 |
-
HDX_DATASETS = {
|
| 16 |
-
"health": {
|
| 17 |
-
"name": "Panama - Health Indicators",
|
| 18 |
-
"url": "https://data.humdata.org/dataset/4d3f9ab7-8e5c-4a24-ae5d-cfc3e81b4db6",
|
| 19 |
-
"description": "WHO health indicators for Panama"
|
| 20 |
-
},
|
| 21 |
-
"education": {
|
| 22 |
-
"name": "Panama - Education",
|
| 23 |
-
"url": "https://data.humdata.org/dataset/panama-education-statistics",
|
| 24 |
-
"description": "UNESCO/World Bank education statistics"
|
| 25 |
-
},
|
| 26 |
-
"economy": {
|
| 27 |
-
"name": "Panama - Economy and Growth",
|
| 28 |
-
"url": "https://data.humdata.org/dataset/panama-economy-indicators",
|
| 29 |
-
"description": "World Bank economic indicators"
|
| 30 |
-
}
|
| 31 |
-
}
|
| 32 |
-
|
| 33 |
-
DATA_DIR = Path(__file__).parent.parent / "data" / "hdx"
|
| 34 |
-
|
| 35 |
-
def download_hdx_dataset(dataset_key: str):
|
| 36 |
-
"""Download a dataset from HDX"""
|
| 37 |
-
dataset = HDX_DATASETS[dataset_key]
|
| 38 |
-
logger.info(f"Downloading {dataset['name']}...")
|
| 39 |
-
|
| 40 |
-
# Create output directory
|
| 41 |
-
output_dir = DATA_DIR / dataset_key
|
| 42 |
-
output_dir.mkdir(parents=True, exist_ok=True)
|
| 43 |
-
|
| 44 |
-
try:
|
| 45 |
-
# HDX datasets typically have resource download URLs
|
| 46 |
-
# We'll need to parse the dataset page to get the actual download link
|
| 47 |
-
response = requests.get(dataset['url'])
|
| 48 |
-
response.raise_for_status()
|
| 49 |
-
|
| 50 |
-
# Note: This is a placeholder - actual implementation would need to:
|
| 51 |
-
# 1. Parse the HDX page HTML to find CSV/Excel download links
|
| 52 |
-
# 2. Download each resource file
|
| 53 |
-
# 3. Save to output_dir
|
| 54 |
-
|
| 55 |
-
logger.info(f"Downloaded to {output_dir}")
|
| 56 |
-
return output_dir
|
| 57 |
-
|
| 58 |
-
except Exception as e:
|
| 59 |
-
logger.error(f"Failed to download {dataset['name']}: {e}")
|
| 60 |
-
return None
|
| 61 |
-
|
| 62 |
-
def main():
|
| 63 |
-
"""Download all HDX datasets"""
|
| 64 |
-
logger.info("Starting HDX data download...")
|
| 65 |
-
|
| 66 |
-
for key in HDX_DATASETS.keys():
|
| 67 |
-
download_hdx_dataset(key)
|
| 68 |
-
|
| 69 |
-
logger.info("Download complete!")
|
| 70 |
-
|
| 71 |
-
if __name__ == "__main__":
|
| 72 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/scripts/download_kontur.py
DELETED
|
@@ -1,239 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Panama Data Ingestion - Phase A: Kontur Population
|
| 3 |
-
|
| 4 |
-
Downloads population density data from HDX (Humanitarian Data Exchange).
|
| 5 |
-
Data source: https://data.humdata.org/dataset/kontur-population-panama
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
import os
|
| 9 |
-
import sys
|
| 10 |
-
import json
|
| 11 |
-
import requests
|
| 12 |
-
import gzip
|
| 13 |
-
import shutil
|
| 14 |
-
from pathlib import Path
|
| 15 |
-
|
| 16 |
-
# HDX API for Kontur Population Panama
|
| 17 |
-
HDX_DATASET_URL = "https://data.humdata.org/api/3/action/package_show?id=kontur-population-panama"
|
| 18 |
-
|
| 19 |
-
# Output directories
|
| 20 |
-
DATA_DIR = Path(__file__).parent.parent / "data"
|
| 21 |
-
KONTUR_DIR = DATA_DIR / "kontur"
|
| 22 |
-
TEMP_DIR = DATA_DIR / "temp"
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
def get_download_url() -> str:
|
| 26 |
-
"""Fetch the actual download URL from HDX API."""
|
| 27 |
-
print("🔍 Fetching download URL from HDX...")
|
| 28 |
-
|
| 29 |
-
try:
|
| 30 |
-
response = requests.get(HDX_DATASET_URL)
|
| 31 |
-
response.raise_for_status()
|
| 32 |
-
data = response.json()
|
| 33 |
-
|
| 34 |
-
if not data.get("success"):
|
| 35 |
-
print("❌ HDX API returned error")
|
| 36 |
-
return None
|
| 37 |
-
|
| 38 |
-
resources = data.get("result", {}).get("resources", [])
|
| 39 |
-
|
| 40 |
-
# Look for GeoJSON or GPKG file
|
| 41 |
-
for resource in resources:
|
| 42 |
-
name = resource.get("name", "").lower()
|
| 43 |
-
url = resource.get("url", "")
|
| 44 |
-
|
| 45 |
-
if "geojson" in name or "gpkg" in name:
|
| 46 |
-
print(f" Found: {resource.get('name')}")
|
| 47 |
-
return url
|
| 48 |
-
|
| 49 |
-
# Fallback to first resource
|
| 50 |
-
if resources:
|
| 51 |
-
return resources[0].get("url")
|
| 52 |
-
|
| 53 |
-
return None
|
| 54 |
-
|
| 55 |
-
except Exception as e:
|
| 56 |
-
print(f"❌ Failed to fetch HDX metadata: {e}")
|
| 57 |
-
return None
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
def download_file(url: str, dest: Path) -> bool:
|
| 61 |
-
"""Download a file with progress indication."""
|
| 62 |
-
print(f"📥 Downloading from {url[:80]}...")
|
| 63 |
-
|
| 64 |
-
try:
|
| 65 |
-
response = requests.get(url, stream=True)
|
| 66 |
-
response.raise_for_status()
|
| 67 |
-
|
| 68 |
-
total_size = int(response.headers.get('content-length', 0))
|
| 69 |
-
downloaded = 0
|
| 70 |
-
|
| 71 |
-
with open(dest, 'wb') as f:
|
| 72 |
-
for chunk in response.iter_content(chunk_size=8192):
|
| 73 |
-
f.write(chunk)
|
| 74 |
-
downloaded += len(chunk)
|
| 75 |
-
if total_size > 0:
|
| 76 |
-
pct = (downloaded / total_size) * 100
|
| 77 |
-
print(f"\r Progress: {pct:.1f}% ({downloaded // 1024}KB)", end="")
|
| 78 |
-
|
| 79 |
-
print(f"\n✅ Downloaded to {dest}")
|
| 80 |
-
return True
|
| 81 |
-
|
| 82 |
-
except Exception as e:
|
| 83 |
-
print(f"❌ Download failed: {e}")
|
| 84 |
-
return False
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
def decompress_if_needed(file_path: Path) -> Path:
|
| 88 |
-
"""Decompress .gz file if needed."""
|
| 89 |
-
if file_path.suffix == '.gz':
|
| 90 |
-
output_path = file_path.with_suffix('')
|
| 91 |
-
print(f"📦 Decompressing {file_path.name}...")
|
| 92 |
-
|
| 93 |
-
with gzip.open(file_path, 'rb') as f_in:
|
| 94 |
-
with open(output_path, 'wb') as f_out:
|
| 95 |
-
shutil.copyfileobj(f_in, f_out)
|
| 96 |
-
|
| 97 |
-
return output_path
|
| 98 |
-
|
| 99 |
-
return file_path
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
def download_population_data():
|
| 103 |
-
"""Download Kontur Population data for Panama."""
|
| 104 |
-
|
| 105 |
-
# Ensure directories exist
|
| 106 |
-
KONTUR_DIR.mkdir(parents=True, exist_ok=True)
|
| 107 |
-
TEMP_DIR.mkdir(parents=True, exist_ok=True)
|
| 108 |
-
|
| 109 |
-
# Get download URL
|
| 110 |
-
download_url = get_download_url()
|
| 111 |
-
|
| 112 |
-
if not download_url:
|
| 113 |
-
# Fallback to known URL pattern
|
| 114 |
-
download_url = "https://geodata-eu-central-1-kontur-public.s3.amazonaws.com/kontur_datasets/kontur_population_PA_20231101.gpkg.gz"
|
| 115 |
-
print(f"⚠️ Using fallback URL: {download_url}")
|
| 116 |
-
|
| 117 |
-
# Determine filename
|
| 118 |
-
filename = download_url.split("/")[-1]
|
| 119 |
-
temp_path = TEMP_DIR / filename
|
| 120 |
-
|
| 121 |
-
# Download
|
| 122 |
-
if not temp_path.exists():
|
| 123 |
-
if not download_file(download_url, temp_path):
|
| 124 |
-
return None
|
| 125 |
-
else:
|
| 126 |
-
print(f"📦 Using cached {temp_path}")
|
| 127 |
-
|
| 128 |
-
# Decompress if needed
|
| 129 |
-
data_path = decompress_if_needed(temp_path)
|
| 130 |
-
|
| 131 |
-
# Move to final location
|
| 132 |
-
final_path = KONTUR_DIR / data_path.name
|
| 133 |
-
if data_path != final_path:
|
| 134 |
-
shutil.move(str(data_path), str(final_path))
|
| 135 |
-
|
| 136 |
-
print(f"✅ Population data ready at {final_path}")
|
| 137 |
-
return final_path
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
def convert_gpkg_to_geojson(gpkg_path: Path) -> Path:
|
| 141 |
-
"""Convert GeoPackage to GeoJSON using ogr2ogr."""
|
| 142 |
-
import subprocess
|
| 143 |
-
|
| 144 |
-
geojson_path = gpkg_path.with_suffix('.geojson')
|
| 145 |
-
|
| 146 |
-
print(f"🔄 Converting to GeoJSON...")
|
| 147 |
-
|
| 148 |
-
try:
|
| 149 |
-
# First, list layers in the GPKG
|
| 150 |
-
result = subprocess.run(
|
| 151 |
-
["ogrinfo", "-so", str(gpkg_path)],
|
| 152 |
-
capture_output=True, text=True
|
| 153 |
-
)
|
| 154 |
-
|
| 155 |
-
# Get the first layer name
|
| 156 |
-
layer_name = None
|
| 157 |
-
for line in result.stdout.split('\n'):
|
| 158 |
-
if ': ' in line and 'using driver' not in line.lower():
|
| 159 |
-
parts = line.split(':')
|
| 160 |
-
if len(parts) >= 2:
|
| 161 |
-
layer_name = parts[0].strip().split()[-1]
|
| 162 |
-
break
|
| 163 |
-
|
| 164 |
-
if not layer_name:
|
| 165 |
-
layer_name = "population" # Default guess
|
| 166 |
-
|
| 167 |
-
cmd = [
|
| 168 |
-
"ogr2ogr",
|
| 169 |
-
"-f", "GeoJSON",
|
| 170 |
-
"-t_srs", "EPSG:4326",
|
| 171 |
-
str(geojson_path),
|
| 172 |
-
str(gpkg_path),
|
| 173 |
-
layer_name
|
| 174 |
-
]
|
| 175 |
-
|
| 176 |
-
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 177 |
-
|
| 178 |
-
if result.returncode == 0:
|
| 179 |
-
size_mb = geojson_path.stat().st_size / 1024 / 1024
|
| 180 |
-
print(f"✅ Created {geojson_path.name} ({size_mb:.1f}MB)")
|
| 181 |
-
return geojson_path
|
| 182 |
-
else:
|
| 183 |
-
print(f"❌ Conversion failed: {result.stderr}")
|
| 184 |
-
return None
|
| 185 |
-
|
| 186 |
-
except FileNotFoundError:
|
| 187 |
-
print("⚠️ ogr2ogr not found. Keeping GPKG format.")
|
| 188 |
-
return gpkg_path
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
def register_in_catalog(data_path: Path):
|
| 192 |
-
"""Register population dataset in the catalog."""
|
| 193 |
-
|
| 194 |
-
catalog_path = DATA_DIR / "catalog.json"
|
| 195 |
-
|
| 196 |
-
if catalog_path.exists():
|
| 197 |
-
with open(catalog_path) as f:
|
| 198 |
-
catalog = json.load(f)
|
| 199 |
-
else:
|
| 200 |
-
catalog = {}
|
| 201 |
-
|
| 202 |
-
# Determine relative path
|
| 203 |
-
rel_path = str(data_path.relative_to(DATA_DIR))
|
| 204 |
-
|
| 205 |
-
catalog["kontur_population"] = {
|
| 206 |
-
"source_file": rel_path,
|
| 207 |
-
"source_type": data_path.suffix[1:], # geojson or gpkg
|
| 208 |
-
"description": "Population density grid for Panama at 400m H3 hexagon resolution. Based on GHSL, Facebook HRSL, and Microsoft Buildings data.",
|
| 209 |
-
"tags": ["population", "density", "panama", "h3", "hexagon", "kontur", "demographics"],
|
| 210 |
-
"data_type": "vector",
|
| 211 |
-
"geometry_type": "polygon",
|
| 212 |
-
"semantic_description": "Population count per 400m H3 hexagonal grid cell. Use for population density analysis, demographic studies, and urban/rural classification."
|
| 213 |
-
}
|
| 214 |
-
|
| 215 |
-
with open(catalog_path, 'w') as f:
|
| 216 |
-
json.dump(catalog, f, indent=2)
|
| 217 |
-
|
| 218 |
-
print(f"📝 Registered kontur_population in catalog")
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
if __name__ == "__main__":
|
| 222 |
-
print("=" * 60)
|
| 223 |
-
print("👥 Panama Population Data Ingestion (Kontur/HDX)")
|
| 224 |
-
print("=" * 60)
|
| 225 |
-
|
| 226 |
-
data_path = download_population_data()
|
| 227 |
-
|
| 228 |
-
if data_path:
|
| 229 |
-
# Convert to GeoJSON if GPKG
|
| 230 |
-
if data_path.suffix == '.gpkg':
|
| 231 |
-
geojson_path = convert_gpkg_to_geojson(data_path)
|
| 232 |
-
if geojson_path and geojson_path.suffix == '.geojson':
|
| 233 |
-
data_path = geojson_path
|
| 234 |
-
|
| 235 |
-
register_in_catalog(data_path)
|
| 236 |
-
print("\n🚀 Population data ready! Restart the backend to load.")
|
| 237 |
-
else:
|
| 238 |
-
print("\n❌ Ingestion failed")
|
| 239 |
-
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/scripts/download_overture.py
DELETED
|
@@ -1,133 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Panama Data Ingestion - Phase B: Overture Maps (Official SDK)
|
| 3 |
-
|
| 4 |
-
Uses the 'overturemaps' Python CLI/SDK to download data for Panama.
|
| 5 |
-
Themes: places, transportation, buildings.
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
import subprocess
|
| 9 |
-
import os
|
| 10 |
-
import sys
|
| 11 |
-
import json
|
| 12 |
-
from pathlib import Path
|
| 13 |
-
|
| 14 |
-
# Panama Bounding Box
|
| 15 |
-
BBOX = "-83.05,7.20,-77.17,9.65" # xmin, ymin, xmax, ymax
|
| 16 |
-
|
| 17 |
-
DATA_DIR = Path(__file__).parent.parent / "data"
|
| 18 |
-
OVERTURE_DIR = DATA_DIR / "overture"
|
| 19 |
-
|
| 20 |
-
def run_overture_download(theme_type: str, output_name: str):
|
| 21 |
-
"""
|
| 22 |
-
Download a specific Overture theme type using the CLI.
|
| 23 |
-
command: overturemaps download --bbox <bbox> -f geojson --type <type> -o <outfile>
|
| 24 |
-
"""
|
| 25 |
-
print(f"\n🌍 Downloading Overture {theme_type}...")
|
| 26 |
-
|
| 27 |
-
# Ensure output dir
|
| 28 |
-
OVERTURE_DIR.mkdir(parents=True, exist_ok=True)
|
| 29 |
-
|
| 30 |
-
output_file = OVERTURE_DIR / output_name
|
| 31 |
-
|
| 32 |
-
# Try using the CLI via subprocess
|
| 33 |
-
# Note: overturemaps downloads to a file buffer then writes.
|
| 34 |
-
cmd = [
|
| 35 |
-
"backend/venv/bin/overturemaps", "download",
|
| 36 |
-
"--bbox", BBOX,
|
| 37 |
-
"-f", "geojson",
|
| 38 |
-
"--type", theme_type,
|
| 39 |
-
"-o", str(output_file)
|
| 40 |
-
]
|
| 41 |
-
|
| 42 |
-
try:
|
| 43 |
-
print(f" Running: {' '.join(cmd)}")
|
| 44 |
-
subprocess.run(cmd, check=True)
|
| 45 |
-
|
| 46 |
-
if output_file.exists():
|
| 47 |
-
size_mb = output_file.stat().st_size / 1024 / 1024
|
| 48 |
-
print(f" ✅ Downloaded {output_name} ({size_mb:.1f}MB)")
|
| 49 |
-
return True
|
| 50 |
-
else:
|
| 51 |
-
print(" ❌ Download produced no file")
|
| 52 |
-
return False
|
| 53 |
-
|
| 54 |
-
except subprocess.CalledProcessError as e:
|
| 55 |
-
print(f" ❌ Command failed: {e}")
|
| 56 |
-
return False
|
| 57 |
-
except Exception as e:
|
| 58 |
-
print(f" ❌ Error: {e}")
|
| 59 |
-
return False
|
| 60 |
-
|
| 61 |
-
def register_in_catalog():
|
| 62 |
-
catalog_path = DATA_DIR / "catalog.json"
|
| 63 |
-
if catalog_path.exists():
|
| 64 |
-
with open(catalog_path) as f:
|
| 65 |
-
catalog = json.load(f)
|
| 66 |
-
else:
|
| 67 |
-
catalog = {}
|
| 68 |
-
|
| 69 |
-
# Places
|
| 70 |
-
if (OVERTURE_DIR / "overture_places.geojson").exists():
|
| 71 |
-
catalog["overture_places"] = {
|
| 72 |
-
"source_file": "overture/overture_places.geojson",
|
| 73 |
-
"source_type": "geojson",
|
| 74 |
-
"description": "Points of Interest from Overture Maps (Places theme)",
|
| 75 |
-
"tags": ["overture", "places", "poi", "businesses", "landmarks"],
|
| 76 |
-
"data_type": "vector",
|
| 77 |
-
"geometry_type": "point",
|
| 78 |
-
"category": "overture",
|
| 79 |
-
"semantic_description": "Comprehensive list of businesses and landmarks with names and categories."
|
| 80 |
-
}
|
| 81 |
-
|
| 82 |
-
# Roads
|
| 83 |
-
if (OVERTURE_DIR / "overture_roads.geojson").exists():
|
| 84 |
-
catalog["overture_roads"] = {
|
| 85 |
-
"source_file": "overture/overture_roads.geojson",
|
| 86 |
-
"source_type": "geojson",
|
| 87 |
-
"description": "Road network segments from Overture Maps",
|
| 88 |
-
"tags": ["overture", "roads", "transportation", "infrastructure"],
|
| 89 |
-
"data_type": "vector",
|
| 90 |
-
"geometry_type": "linestring",
|
| 91 |
-
"category": "overture"
|
| 92 |
-
}
|
| 93 |
-
|
| 94 |
-
# Buildings
|
| 95 |
-
if (OVERTURE_DIR / "overture_buildings.geojson").exists():
|
| 96 |
-
catalog["overture_buildings"] = {
|
| 97 |
-
"source_file": "overture/overture_buildings.geojson",
|
| 98 |
-
"source_type": "geojson",
|
| 99 |
-
"description": "Building footprints from Overture Maps (includes Microsoft & OSM)",
|
| 100 |
-
"tags": ["overture", "buildings", "footprints", "infrastructure"],
|
| 101 |
-
"data_type": "vector",
|
| 102 |
-
"geometry_type": "polygon",
|
| 103 |
-
"category": "overture",
|
| 104 |
-
"semantic_description": "Comprehensive building footprints including height and level data where available."
|
| 105 |
-
}
|
| 106 |
-
|
| 107 |
-
with open(catalog_path, 'w') as f:
|
| 108 |
-
json.dump(catalog, f, indent=2)
|
| 109 |
-
print("📝 Registered Overture datasets in catalog")
|
| 110 |
-
|
| 111 |
-
if __name__ == "__main__":
|
| 112 |
-
print("="*60)
|
| 113 |
-
print("🌐 Overture Maps Ingestion (via Official SDK)")
|
| 114 |
-
print("="*60)
|
| 115 |
-
|
| 116 |
-
# Themes to download
|
| 117 |
-
# Type names: place, segment, building
|
| 118 |
-
# Note: 'segment' is in transportation theme. 'building' in buildings.
|
| 119 |
-
|
| 120 |
-
results = []
|
| 121 |
-
results.append(run_overture_download("place", "overture_places.geojson"))
|
| 122 |
-
results.append(run_overture_download("segment", "overture_roads.geojson"))
|
| 123 |
-
|
| 124 |
-
# Buildings might be HUGE.
|
| 125 |
-
# Panama isn't that big but buildings has many polygons.
|
| 126 |
-
# Let's try it.
|
| 127 |
-
results.append(run_overture_download("building", "overture_buildings.geojson"))
|
| 128 |
-
|
| 129 |
-
if any(results):
|
| 130 |
-
register_in_catalog()
|
| 131 |
-
print("\n🚀 Phase B Ingestion Complete!")
|
| 132 |
-
else:
|
| 133 |
-
print("\n❌ All downloads failed.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/scripts/download_stri_data.py
DELETED
|
@@ -1,79 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Download Panama Protected Areas from STRI GIS Portal
|
| 4 |
-
Download Protected Areas shapefile and convert to GeoJSON
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import requests
|
| 8 |
-
import geopandas as gpd
|
| 9 |
-
from pathlib import Path
|
| 10 |
-
import logging
|
| 11 |
-
import zipfile
|
| 12 |
-
import io
|
| 13 |
-
|
| 14 |
-
logging.basicConfig(level=logging.INFO)
|
| 15 |
-
logger = logging.getLogger(__name__)
|
| 16 |
-
|
| 17 |
-
DATA_DIR = Path(__file__).parent.parent / "data" / "stri"
|
| 18 |
-
|
| 19 |
-
# STRI GIS Data Portal URLs
|
| 20 |
-
STRI_DATASETS = {
|
| 21 |
-
"protected_areas": {
|
| 22 |
-
"url": "https://smithsoniangis.maps.arcgis.com/sharing/rest/content/items/7ee9c9c3f8874e7b8e8d39c7e5a1e3e8/data",
|
| 23 |
-
"description": "Protected Areas of Panama 2022 Edition (SINAP + WDPA)"
|
| 24 |
-
}
|
| 25 |
-
}
|
| 26 |
-
|
| 27 |
-
def download_stri_protected_areas():
|
| 28 |
-
"""Download STRI Protected Areas shapefile"""
|
| 29 |
-
logger.info("Attempting to download STRI Protected Areas...")
|
| 30 |
-
|
| 31 |
-
output_dir = DATA_DIR / "protected_areas"
|
| 32 |
-
output_dir.mkdir(parents=True, exist_ok=True)
|
| 33 |
-
|
| 34 |
-
# Try alternative: use ArcGIS REST API to export to GeoJSON
|
| 35 |
-
# This is thestandard ESRI Feature Service export endpoint
|
| 36 |
-
service_url = "https://services.arcgis.com/nzS0F0zdNLvs7nc8/arcgis/rest/services/ProtectedAreas_Panama_2022/FeatureServer/0/query"
|
| 37 |
-
|
| 38 |
-
params = {
|
| 39 |
-
"where": "1=1", # Get all features
|
| 40 |
-
"outFields": "*", # All fields
|
| 41 |
-
"f": "geojson", # GeoJSON format
|
| 42 |
-
"returnGeometry": "true"
|
| 43 |
-
}
|
| 44 |
-
|
| 45 |
-
try:
|
| 46 |
-
logger.info("Querying STRI ArcGIS Feature Service...")
|
| 47 |
-
response = requests.get(service_url, params=params, timeout=120)
|
| 48 |
-
response.raise_for_status()
|
| 49 |
-
|
| 50 |
-
# Save GeoJSON
|
| 51 |
-
geojson_path = output_dir / "panama_protected_areas.geojson"
|
| 52 |
-
with open(geojson_path, 'wb') as f:
|
| 53 |
-
f.write(response.content)
|
| 54 |
-
|
| 55 |
-
# Read to get count
|
| 56 |
-
gdf = gpd.read_file(geojson_path)
|
| 57 |
-
logger.info(f"Downloaded {len(gdf)} protected areas")
|
| 58 |
-
|
| 59 |
-
return geojson_path, len(gdf)
|
| 60 |
-
|
| 61 |
-
except Exception as e:
|
| 62 |
-
logger.error(f"Failed to download from ArcGIS service: {e}")
|
| 63 |
-
return None, 0
|
| 64 |
-
|
| 65 |
-
def main():
|
| 66 |
-
logger.info("=== Downloading STRI Panama Protected Areas ===")
|
| 67 |
-
|
| 68 |
-
path, count = download_stri_protected_areas()
|
| 69 |
-
|
| 70 |
-
if path:
|
| 71 |
-
logger.info(f"\n✅ Success: {count} protected areas downloaded")
|
| 72 |
-
logger.info(f" Path: {path}")
|
| 73 |
-
else:
|
| 74 |
-
logger.error("\n❌ Failed to download protected areas")
|
| 75 |
-
|
| 76 |
-
return path, count
|
| 77 |
-
|
| 78 |
-
if __name__ == "__main__":
|
| 79 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/scripts/download_worldbank.py
DELETED
|
@@ -1,141 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
World Bank Data Downloader for Panama
|
| 4 |
-
Downloads socio-economic indicators from World Bank API v2
|
| 5 |
-
API Documentation: https://datahelpdesk.worldbank.org/knowledgebase/articles/889392-about-the-indicators-api-documentation
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
import requests
|
| 9 |
-
import pandas as pd
|
| 10 |
-
from pathlib import Path
|
| 11 |
-
import logging
|
| 12 |
-
import time
|
| 13 |
-
|
| 14 |
-
logging.basicConfig(level=logging.INFO)
|
| 15 |
-
logger = logging.getLogger(__name__)
|
| 16 |
-
|
| 17 |
-
# World Bank API base URL
|
| 18 |
-
WB_API_BASE = "https://api.worldbank.org/v2"
|
| 19 |
-
|
| 20 |
-
# Key indicators for Panama (ISO3: PAN)
|
| 21 |
-
INDICATORS = {
|
| 22 |
-
#Human: I notice this is getting quite long. Let me provide a more focused implementation - downloading a small set of key indicators first, then we can expand.
|
| 23 |
-
|
| 24 |
-
# Poverty & Inequality
|
| 25 |
-
"SI.POV.NAHC": "Poverty headcount ratio at national poverty lines (% of population)",
|
| 26 |
-
"SI.POV.DDAY": "Poverty headcount ratio at $2.15 a day (2017 PPP) (% of population)",
|
| 27 |
-
"SI.POV.UMIC": "Poverty headcount ratio at $6.85 a day (2017 PPP) (% of population)",
|
| 28 |
-
"SI.POV.GINI": "Gini index (World Bank estimate)",
|
| 29 |
-
|
| 30 |
-
# Employment & Labor
|
| 31 |
-
"SL.UEM.TOTL.ZS": "Unemployment, total (% of total labor force)",
|
| 32 |
-
"SL.TLF.CACT.FE.ZS": "Labor force participation rate, female (% of female population ages 15+)",
|
| 33 |
-
"SL.TLF.CACT.MA.ZS": "Labor force participation rate, male (% of male population ages 15+)",
|
| 34 |
-
|
| 35 |
-
# GDP & Economy
|
| 36 |
-
"NY.GDP.MKTP.CD": "GDP (current US$)",
|
| 37 |
-
"NY.GDP.PCAP.CD": "GDP per capita (current US$)",
|
| 38 |
-
"NY.GDP.MKTP.KD.ZG": "GDP growth (annual %)",
|
| 39 |
-
|
| 40 |
-
# Health
|
| 41 |
-
"SH.STA.MMRT": "Maternal mortality ratio (per 100,000 live births)",
|
| 42 |
-
"SH.DYN.MORT": "Mortality rate, under-5 (per 1,000 live births)",
|
| 43 |
-
"SH.XPD.CHEX.GD.ZS": "Current health expenditure (% of GDP)",
|
| 44 |
-
|
| 45 |
-
# Education
|
| 46 |
-
"SE.ADT.LITR.ZS": "Literacy rate, adult total (% of people ages 15 and above)",
|
| 47 |
-
"SE.PRM.NENR": "School enrollment, primary (% net)",
|
| 48 |
-
"SE.SEC.NENR": "School enrollment, secondary (% net)",
|
| 49 |
-
"SE.XPD.TOTL.GD.ZS": "Government expenditure on education, total (% of GDP)"
|
| 50 |
-
}
|
| 51 |
-
|
| 52 |
-
DATA_DIR = Path(__file__).parent.parent / "data" / "worldbank"
|
| 53 |
-
|
| 54 |
-
def fetch_indicator(indicator_code: str, indicator_name: str) -> pd.DataFrame:
|
| 55 |
-
"""Fetch a single indicator for Panama from World Bank API"""
|
| 56 |
-
logger.info(f"Fetching: {indicator_name}")
|
| 57 |
-
|
| 58 |
-
url = f"{WB_API_BASE}/country/PAN/indicator/{indicator_code}"
|
| 59 |
-
params = {
|
| 60 |
-
"format": "json",
|
| 61 |
-
"per_page": 100,
|
| 62 |
-
"date": "2000:2024" # Last 24 years
|
| 63 |
-
}
|
| 64 |
-
|
| 65 |
-
try:
|
| 66 |
-
response = requests.get(url, params=params)
|
| 67 |
-
response.raise_for_status()
|
| 68 |
-
data = response.json()
|
| 69 |
-
|
| 70 |
-
if len(data) < 2 or not data[1]:
|
| 71 |
-
logger.warning(f"No data returned for {indicator_code}")
|
| 72 |
-
return None
|
| 73 |
-
|
| 74 |
-
# Convert to DataFrame
|
| 75 |
-
records = []
|
| 76 |
-
for entry in data[1]:
|
| 77 |
-
if entry.get('value') is not None:
|
| 78 |
-
records.append({
|
| 79 |
-
'year': int(entry['date']),
|
| 80 |
-
'value': float(entry['value']),
|
| 81 |
-
'indicator_code': indicator_code,
|
| 82 |
-
'indicator_name': indicator_name,
|
| 83 |
-
'country': entry['country']['value']
|
| 84 |
-
})
|
| 85 |
-
|
| 86 |
-
if not records:
|
| 87 |
-
logger.warning(f"No valid values for {indicator_code}")
|
| 88 |
-
return None
|
| 89 |
-
|
| 90 |
-
df = pd.DataFrame(records)
|
| 91 |
-
logger.info(f" → Downloaded {len(df)} years of data")
|
| 92 |
-
return df
|
| 93 |
-
|
| 94 |
-
except Exception as e:
|
| 95 |
-
logger.error(f"Failed to fetch {indicator_code}: {e}")
|
| 96 |
-
return None
|
| 97 |
-
|
| 98 |
-
def download_all_indicators():
|
| 99 |
-
"""Download all indicators and save to CSV"""
|
| 100 |
-
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
| 101 |
-
|
| 102 |
-
all_data = []
|
| 103 |
-
|
| 104 |
-
for code, name in INDICATORS.items():
|
| 105 |
-
df = fetch_indicator(code, name)
|
| 106 |
-
if df is not None:
|
| 107 |
-
all_data.append(df)
|
| 108 |
-
time.sleep(0.5) # Rate limiting
|
| 109 |
-
|
| 110 |
-
if not all_data:
|
| 111 |
-
logger.error("No data downloaded!")
|
| 112 |
-
return
|
| 113 |
-
|
| 114 |
-
# Combine all indicators
|
| 115 |
-
combined_df = pd.concat(all_data, ignore_index=True)
|
| 116 |
-
|
| 117 |
-
# Save as CSV
|
| 118 |
-
output_file = DATA_DIR / "panama_indicators.csv"
|
| 119 |
-
combined_df.to_csv(output_file, index=False)
|
| 120 |
-
logger.info(f"Saved {len(combined_df)} records to {output_file}")
|
| 121 |
-
|
| 122 |
-
# Create pivot table for easy viewing
|
| 123 |
-
pivot_df = combined_df.pivot_table(
|
| 124 |
-
index='year',
|
| 125 |
-
columns='indicator_name',
|
| 126 |
-
values='value'
|
| 127 |
-
)
|
| 128 |
-
|
| 129 |
-
pivot_file = DATA_DIR / "panama_indicators_pivot.csv"
|
| 130 |
-
pivot_df.to_csv(pivot_file)
|
| 131 |
-
logger.info(f"Saved pivot table to {pivot_file}")
|
| 132 |
-
|
| 133 |
-
return combined_df
|
| 134 |
-
|
| 135 |
-
def main():
|
| 136 |
-
logger.info("Starting World Bank data download for Panama...")
|
| 137 |
-
download_all_indicators()
|
| 138 |
-
logger.info("Download complete!")
|
| 139 |
-
|
| 140 |
-
if __name__ == "__main__":
|
| 141 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|