Spaces:

GerardCB
/

GeoQuery

Running

App Files Files Community

GerardCB commited on 6 days ago

Commit

c6a9e71

1 Parent(s): 4851501

Cleanup: Remove unused download scripts

Browse files

Files changed (7) hide show

backend/scripts/download_geofabrik.py +0 -192
backend/scripts/download_global_datasets.py +0 -133
backend/scripts/download_hdx.py +0 -72
backend/scripts/download_kontur.py +0 -239
backend/scripts/download_overture.py +0 -133
backend/scripts/download_stri_data.py +0 -79
backend/scripts/download_worldbank.py +0 -141

backend/scripts/download_geofabrik.py DELETED Viewed

@@ -1,192 +0,0 @@
-"""
-Panama Data Ingestion - Phase A: OpenStreetMap via Geofabrik
-Downloads pre-packaged OSM data for Panama as shapefiles and converts to GeoJSON.
-Data source: https://download.geofabrik.de/central-america.html
-"""
-import os
-import sys
-import zipfile
-import requests
-import subprocess
-from pathlib import Path
-# Panama Geofabrik URL
-GEOFABRIK_URL = "https://download.geofabrik.de/central-america/panama-latest-free.shp.zip"
-# Output directories
-DATA_DIR = Path(__file__).parent.parent / "data"
-OSM_DIR = DATA_DIR / "osm"
-TEMP_DIR = DATA_DIR / "temp"
-# OSM layers to extract
-OSM_LAYERS = [
-    ("gis_osm_roads_free_1", "roads", "Road network with classification"),
-    ("gis_osm_pois_free_1", "pois", "Points of interest (restaurants, shops, etc.)"),
-    ("gis_osm_pois_a_free_1", "pois_areas", "POI areas (larger venues)"),
-    ("gis_osm_buildings_a_free_1", "buildings", "Building footprints"),
-    ("gis_osm_landuse_a_free_1", "landuse", "Land use zones (residential, commercial, etc.)"),
-    ("gis_osm_natural_free_1", "natural_points", "Natural features (trees, peaks)"),
-    ("gis_osm_natural_a_free_1", "natural_areas", "Natural areas (forests, parks)"),
-    ("gis_osm_water_a_free_1", "water_areas", "Water bodies (lakes, reservoirs)"),
-    ("gis_osm_waterways_free_1", "waterways", "Rivers and streams"),
-    ("gis_osm_railways_free_1", "railways", "Railway lines"),
-    ("gis_osm_traffic_free_1", "traffic", "Traffic infrastructure (signals, crossings)"),
-    ("gis_osm_traffic_a_free_1", "traffic_areas", "Traffic areas (parking lots)"),
-    ("gis_osm_transport_free_1", "transport", "Transport points (bus stops, stations)"),
-    ("gis_osm_transport_a_free_1", "transport_areas", "Transport areas (airports, ports)"),
-    ("gis_osm_places_free_1", "places", "Place names (cities, towns, villages)"),
-    ("gis_osm_places_a_free_1", "places_areas", "Place areas"),
-    ("gis_osm_pofw_free_1", "places_of_worship", "Places of worship"),
-    ("gis_osm_pofw_a_free_1", "places_of_worship_areas", "Places of worship (buildings)"),
-]
-def download_file(url: str, dest: Path) -> bool:
-    """Download a file with progress indication."""
-    print(f"📥 Downloading {url}...")
-    try:
-        response = requests.get(url, stream=True)
-        response.raise_for_status()
-        total_size = int(response.headers.get('content-length', 0))
-        downloaded = 0
-        with open(dest, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
-                downloaded += len(chunk)
-                if total_size > 0:
-                    pct = (downloaded / total_size) * 100
-                    print(f"\r  Progress: {pct:.1f}% ({downloaded // 1024 // 1024}MB)", end="")
-        print(f"\n✅ Downloaded to {dest}")
-        return True
-    except Exception as e:
-        print(f"❌ Download failed: {e}")
-        return False
-def convert_shp_to_geojson(shp_path: Path, geojson_path: Path) -> bool:
-    """Convert shapefile to GeoJSON using ogr2ogr."""
-    try:
-        cmd = [
-            "ogr2ogr",
-            "-f", "GeoJSON",
-            "-t_srs", "EPSG:4326",  # Ensure WGS84
-            str(geojson_path),
-            str(shp_path)
-        ]
-        result = subprocess.run(cmd, capture_output=True, text=True)
-        if result.returncode == 0:
-            return True
-        else:
-            print(f"  ogr2ogr error: {result.stderr}")
-            return False
-    except FileNotFoundError:
-        print("⚠️  ogr2ogr not found. Please install GDAL:")
-        print("   brew install gdal  # macOS")
-        print("   apt install gdal-bin  # Ubuntu")
-        return False
-def extract_and_convert():
-    """Extract shapefiles from zip and convert to GeoJSON."""
-    # Ensure directories exist
-    OSM_DIR.mkdir(parents=True, exist_ok=True)
-    TEMP_DIR.mkdir(parents=True, exist_ok=True)
-    zip_path = TEMP_DIR / "panama-osm.zip"
-    # Download if not exists
-    if not zip_path.exists():
-        if not download_file(GEOFABRIK_URL, zip_path):
-            return False
-    else:
-        print(f"📦 Using cached {zip_path}")
-    # Extract
-    print(f"📂 Extracting to {TEMP_DIR}...")
-    with zipfile.ZipFile(zip_path, 'r') as zf:
-        zf.extractall(TEMP_DIR)
-    # Convert each layer
-    converted = 0
-    for shp_name, output_name, description in OSM_LAYERS:
-        shp_path = TEMP_DIR / f"{shp_name}.shp"
-        geojson_path = OSM_DIR / f"{output_name}.geojson"
-        if not shp_path.exists():
-            print(f"⏭️  Skipping {shp_name} (not in download)")
-            continue
-        print(f"🔄 Converting {shp_name} → {output_name}.geojson...")
-        if convert_shp_to_geojson(shp_path, geojson_path):
-            # Get file size
-            size_mb = geojson_path.stat().st_size / 1024 / 1024
-            print(f"  ✅ Created {geojson_path.name} ({size_mb:.1f}MB)")
-            converted += 1
-        else:
-            print(f"  ❌ Failed to convert {shp_name}")
-    print(f"\n🎉 Converted {converted}/{len(OSM_LAYERS)} OSM layers")
-    return converted > 0
-def register_in_catalog():
-    """Register OSM datasets in the catalog."""
-    import json
-    catalog_path = DATA_DIR / "catalog.json"
-    if catalog_path.exists():
-        with open(catalog_path) as f:
-            catalog = json.load(f)
-    else:
-        catalog = {}
-    for shp_name, output_name, description in OSM_LAYERS:
-        geojson_path = OSM_DIR / f"{output_name}.geojson"
-        if not geojson_path.exists():
-            continue
-        # Create catalog entry
-        table_name = f"osm_{output_name}"
-        rel_path = f"osm/{output_name}.geojson"
-        catalog[table_name] = {
-            "source_file": rel_path,
-            "source_type": "geojson",
-            "description": f"OpenStreetMap {description} for Panama",
-            "tags": ["osm", "panama", output_name.replace("_", " ")],
-            "data_type": "vector",
-            "geometry_type": "auto"  # Will be detected on load
-        }
-        print(f"📝 Registered {table_name}")
-    with open(catalog_path, 'w') as f:
-        json.dump(catalog, f, indent=2)
-    print(f"✅ Updated catalog with OSM datasets")
-if __name__ == "__main__":
-    print("=" * 60)
-    print("🗺️  Panama OSM Data Ingestion (Geofabrik)")
-    print("=" * 60)
-    if extract_and_convert():
-        register_in_catalog()
-        print("\n🚀 OSM data ready! Restart the backend to load new datasets.")
-    else:
-        print("\n❌ Ingestion failed")
-        sys.exit(1)

backend/scripts/download_global_datasets.py DELETED Viewed

@@ -1,133 +0,0 @@
-#!/usr/bin/env python3
-"""
-Download global geo-referenced datasets for Panama
-- OurAirports: Global airport database
-- WRI Global Power Plant Database
-- Other infrastructure datasets
-"""
-import requests
-import pandas as pd
-import geopandas as gpd
-from pathlib import Path
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-DATA_DIR = Path(__file__).parent.parent / "data" / "global"
-# Dataset URLs
-DATASETS = {
-    "airports": {
-        "url": "https://davidmegginson.github.io/ourairports-data/airports.csv",
-        "description": "OurAirports - Global airport database"
-    },
-    "power_plants": {
-        "url": "https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3/global_power_plant_database.csv",
-        "description": "WRI Global Power Plant Database v1.3"
-    }
-}
-def download_airports():
-    """Download and process OurAir ports data for Panama"""
-    logger.info("Downloading OurAirports global database...")
-    url = DATASETS["airports"]["url"]
-    response = requests.get(url)
-    response.raise_for_status()
-    # Save raw CSV
-    output_dir = DATA_DIR / "airports"
-    output_dir.mkdir(parents=True, exist_ok=True)
-    csv_path = output_dir / "airports_global.csv"
-    with open(csv_path, 'wb') as f:
-        f.write(response.content)
-    logger.info(f"Saved raw airports data: {csv_path}")
-    # Filter for Panama (iso_country = PA)
-    df = pd.read_csv(csv_path)
-    panama_df = df[df['iso_country'] == 'PA'].copy()
-    logger.info(f"Found {len(panama_df)} airports in Panama")
-    # Convert to GeoDataFrame
-    gdf = gpd.GeoDataFrame(
-        panama_df,
-        geometry=gpd.points_from_xy(panama_df.longitude_deg, panama_df.latitude_deg),
-        crs="EPSG:4326"
-    )
-    # Save as GeoJSON
-    geojson_path = output_dir / "panama_airports.geojson"
-    gdf.to_file(geojson_path, driver='GeoJSON')
-    logger.info(f"Created GeoJSON: {geojson_path}")
-    return geojson_path, len(gdf)
-def download_power_plants():
-    """Download and process WRI Global Power Plant Database for Panama"""
-    logger.info("Downloading WRI Global Power Plant Database...")
-    url = DATASETS["power_plants"]["url"]
-    response = requests.get(url)
-    response.raise_for_status()
-    # Save raw CSV
-    output_dir = DATA_DIR / "power_plants"
-    output_dir.mkdir(parents=True, exist_ok=True)
-    csv_path = output_dir / "power_plants_global.csv"
-    with open(csv_path, 'wb') as f:
-        f.write(response.content)
-    logger.info(f"Saved raw power plants data: {csv_path}")
-    # Filter for Panama (country = PAN)
-    df = pd.read_csv(csv_path)
-    panama_df = df[df['country'] == 'PAN'].copy()
-    logger.info(f"Found {len(panama_df)} power plants in Panama")
-    # Convert to GeoDataFrame
-    gdf = gpd.GeoDataFrame(
-        panama_df,
-        geometry=gpd.points_from_xy(panama_df.longitude, panama_df.latitude),
-        crs="EPSG:4326"
-    )
-    # Save as GeoJSON
-    geojson_path = output_dir / "panama_power_plants.geojson"
-    gdf.to_file(geojson_path, driver='GeoJSON')
-    logger.info(f"Created GeoJSON: {geojson_path}")
-    return geojson_path, len(gdf)
-def main():
-    logger.info("=== Global Dataset Download Starting ===")
-    results = []
-    try:
-        airports_path, airports_count = download_airports()
-        results.append({"dataset": "airports", "count": airports_count, "path": airports_path})
-    except Exception as e:
-        logger.error(f"Failed to download airports: {e}")
-    try:
-        power_path, power_count = download_power_plants()
-        results.append({"dataset": "power_plants", "count": power_count, "path": power_path})
-    except Exception as e:
-        logger.error(f"Failed to download power plants: {e}")
-    logger.info("\n=== Download Summary ===")
-    for result in results:
-        logger.info(f"  {result['dataset']}: {result['count']} features")
-    logger.info("\n=== Complete ===")
-    return results
-if __name__ == "__main__":
-    main()

backend/scripts/download_hdx.py DELETED Viewed

@@ -1,72 +0,0 @@
-#!/usr/bin/env python3
-"""
-HDX Data Downloader for Panama
-Downloads official datasets from Humanitarian Data Exchange
-"""
-import requests
-from pathlib import Path
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# HDX Dataset URLs (from research)
-HDX_DATASETS = {
-    "health": {
-        "name": "Panama - Health Indicators",
-        "url": "https://data.humdata.org/dataset/4d3f9ab7-8e5c-4a24-ae5d-cfc3e81b4db6",
-        "description": "WHO health indicators for Panama"
-    },
-    "education": {
-        "name": "Panama - Education",
-        "url": "https://data.humdata.org/dataset/panama-education-statistics",
-        "description": "UNESCO/World Bank education statistics"
-    },
-    "economy": {
-        "name": "Panama - Economy and Growth",
-        "url": "https://data.humdata.org/dataset/panama-economy-indicators",
-        "description": "World Bank economic indicators"
-    }
-}
-DATA_DIR = Path(__file__).parent.parent / "data" / "hdx"
-def download_hdx_dataset(dataset_key: str):
-    """Download a dataset from HDX"""
-    dataset = HDX_DATASETS[dataset_key]
-    logger.info(f"Downloading {dataset['name']}...")
-    # Create output directory
-    output_dir = DATA_DIR / dataset_key
-    output_dir.mkdir(parents=True, exist_ok=True)
-    try:
-        # HDX datasets typically have resource download URLs
-        # We'll need to parse the dataset page to get the actual download link
-        response = requests.get(dataset['url'])
-        response.raise_for_status()
-        # Note: This is a placeholder - actual implementation would need to:
-        # 1. Parse the HDX page HTML to find CSV/Excel download links
-        # 2. Download each resource file
-        # 3. Save to output_dir
-        logger.info(f"Downloaded to {output_dir}")
-        return output_dir
-    except Exception as e:
-        logger.error(f"Failed to download {dataset['name']}: {e}")
-        return None
-def main():
-    """Download all HDX datasets"""
-    logger.info("Starting HDX data download...")
-    for key in HDX_DATASETS.keys():
-        download_hdx_dataset(key)
-    logger.info("Download complete!")
-if __name__ == "__main__":
-    main()

backend/scripts/download_kontur.py DELETED Viewed

@@ -1,239 +0,0 @@
-"""
-Panama Data Ingestion - Phase A: Kontur Population
-Downloads population density data from HDX (Humanitarian Data Exchange).
-Data source: https://data.humdata.org/dataset/kontur-population-panama
-"""
-import os
-import sys
-import json
-import requests
-import gzip
-import shutil
-from pathlib import Path
-# HDX API for Kontur Population Panama
-HDX_DATASET_URL = "https://data.humdata.org/api/3/action/package_show?id=kontur-population-panama"
-# Output directories
-DATA_DIR = Path(__file__).parent.parent / "data"
-KONTUR_DIR = DATA_DIR / "kontur"
-TEMP_DIR = DATA_DIR / "temp"
-def get_download_url() -> str:
-    """Fetch the actual download URL from HDX API."""
-    print("🔍 Fetching download URL from HDX...")
-    try:
-        response = requests.get(HDX_DATASET_URL)
-        response.raise_for_status()
-        data = response.json()
-        if not data.get("success"):
-            print("❌ HDX API returned error")
-            return None
-        resources = data.get("result", {}).get("resources", [])
-        # Look for GeoJSON or GPKG file
-        for resource in resources:
-            name = resource.get("name", "").lower()
-            url = resource.get("url", "")
-            if "geojson" in name or "gpkg" in name:
-                print(f"  Found: {resource.get('name')}")
-                return url
-        # Fallback to first resource
-        if resources:
-            return resources[0].get("url")
-        return None
-    except Exception as e:
-        print(f"❌ Failed to fetch HDX metadata: {e}")
-        return None
-def download_file(url: str, dest: Path) -> bool:
-    """Download a file with progress indication."""
-    print(f"📥 Downloading from {url[:80]}...")
-    try:
-        response = requests.get(url, stream=True)
-        response.raise_for_status()
-        total_size = int(response.headers.get('content-length', 0))
-        downloaded = 0
-        with open(dest, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
-                downloaded += len(chunk)
-                if total_size > 0:
-                    pct = (downloaded / total_size) * 100
-                    print(f"\r  Progress: {pct:.1f}% ({downloaded // 1024}KB)", end="")
-        print(f"\n✅ Downloaded to {dest}")
-        return True
-    except Exception as e:
-        print(f"❌ Download failed: {e}")
-        return False
-def decompress_if_needed(file_path: Path) -> Path:
-    """Decompress .gz file if needed."""
-    if file_path.suffix == '.gz':
-        output_path = file_path.with_suffix('')
-        print(f"📦 Decompressing {file_path.name}...")
-        with gzip.open(file_path, 'rb') as f_in:
-            with open(output_path, 'wb') as f_out:
-                shutil.copyfileobj(f_in, f_out)
-        return output_path
-    return file_path
-def download_population_data():
-    """Download Kontur Population data for Panama."""
-    # Ensure directories exist
-    KONTUR_DIR.mkdir(parents=True, exist_ok=True)
-    TEMP_DIR.mkdir(parents=True, exist_ok=True)
-    # Get download URL
-    download_url = get_download_url()
-    if not download_url:
-        # Fallback to known URL pattern
-        download_url = "https://geodata-eu-central-1-kontur-public.s3.amazonaws.com/kontur_datasets/kontur_population_PA_20231101.gpkg.gz"
-        print(f"⚠️  Using fallback URL: {download_url}")
-    # Determine filename
-    filename = download_url.split("/")[-1]
-    temp_path = TEMP_DIR / filename
-    # Download
-    if not temp_path.exists():
-        if not download_file(download_url, temp_path):
-            return None
-    else:
-        print(f"📦 Using cached {temp_path}")
-    # Decompress if needed
-    data_path = decompress_if_needed(temp_path)
-    # Move to final location
-    final_path = KONTUR_DIR / data_path.name
-    if data_path != final_path:
-        shutil.move(str(data_path), str(final_path))
-    print(f"✅ Population data ready at {final_path}")
-    return final_path
-def convert_gpkg_to_geojson(gpkg_path: Path) -> Path:
-    """Convert GeoPackage to GeoJSON using ogr2ogr."""
-    import subprocess
-    geojson_path = gpkg_path.with_suffix('.geojson')
-    print(f"🔄 Converting to GeoJSON...")
-    try:
-        # First, list layers in the GPKG
-        result = subprocess.run(
-            ["ogrinfo", "-so", str(gpkg_path)],
-            capture_output=True, text=True
-        )
-        # Get the first layer name
-        layer_name = None
-        for line in result.stdout.split('\n'):
-            if ': ' in line and 'using driver' not in line.lower():
-                parts = line.split(':')
-                if len(parts) >= 2:
-                    layer_name = parts[0].strip().split()[-1]
-                    break
-        if not layer_name:
-            layer_name = "population"  # Default guess
-        cmd = [
-            "ogr2ogr",
-            "-f", "GeoJSON",
-            "-t_srs", "EPSG:4326",
-            str(geojson_path),
-            str(gpkg_path),
-            layer_name
-        ]
-        result = subprocess.run(cmd, capture_output=True, text=True)
-        if result.returncode == 0:
-            size_mb = geojson_path.stat().st_size / 1024 / 1024
-            print(f"✅ Created {geojson_path.name} ({size_mb:.1f}MB)")
-            return geojson_path
-        else:
-            print(f"❌ Conversion failed: {result.stderr}")
-            return None
-    except FileNotFoundError:
-        print("⚠️  ogr2ogr not found. Keeping GPKG format.")
-        return gpkg_path
-def register_in_catalog(data_path: Path):
-    """Register population dataset in the catalog."""
-    catalog_path = DATA_DIR / "catalog.json"
-    if catalog_path.exists():
-        with open(catalog_path) as f:
-            catalog = json.load(f)
-    else:
-        catalog = {}
-    # Determine relative path
-    rel_path = str(data_path.relative_to(DATA_DIR))
-    catalog["kontur_population"] = {
-        "source_file": rel_path,
-        "source_type": data_path.suffix[1:],  # geojson or gpkg
-        "description": "Population density grid for Panama at 400m H3 hexagon resolution. Based on GHSL, Facebook HRSL, and Microsoft Buildings data.",
-        "tags": ["population", "density", "panama", "h3", "hexagon", "kontur", "demographics"],
-        "data_type": "vector",
-        "geometry_type": "polygon",
-        "semantic_description": "Population count per 400m H3 hexagonal grid cell. Use for population density analysis, demographic studies, and urban/rural classification."
-    }
-    with open(catalog_path, 'w') as f:
-        json.dump(catalog, f, indent=2)
-    print(f"📝 Registered kontur_population in catalog")
-if __name__ == "__main__":
-    print("=" * 60)
-    print("👥 Panama Population Data Ingestion (Kontur/HDX)")
-    print("=" * 60)
-    data_path = download_population_data()
-    if data_path:
-        # Convert to GeoJSON if GPKG
-        if data_path.suffix == '.gpkg':
-            geojson_path = convert_gpkg_to_geojson(data_path)
-            if geojson_path and geojson_path.suffix == '.geojson':
-                data_path = geojson_path
-        register_in_catalog(data_path)
-        print("\n🚀 Population data ready! Restart the backend to load.")
-    else:
-        print("\n❌ Ingestion failed")
-        sys.exit(1)

backend/scripts/download_overture.py DELETED Viewed

@@ -1,133 +0,0 @@
-"""
-Panama Data Ingestion - Phase B: Overture Maps (Official SDK)
-Uses the 'overturemaps' Python CLI/SDK to download data for Panama.
-Themes: places, transportation, buildings.
-"""
-import subprocess
-import os
-import sys
-import json
-from pathlib import Path
-# Panama Bounding Box
-BBOX = "-83.05,7.20,-77.17,9.65" # xmin, ymin, xmax, ymax
-DATA_DIR = Path(__file__).parent.parent / "data"
-OVERTURE_DIR = DATA_DIR / "overture"
-def run_overture_download(theme_type: str, output_name: str):
-    """
-    Download a specific Overture theme type using the CLI.
-    command: overturemaps download --bbox <bbox> -f geojson --type <type> -o <outfile>
-    """
-    print(f"\n🌍 Downloading Overture {theme_type}...")
-    # Ensure output dir
-    OVERTURE_DIR.mkdir(parents=True, exist_ok=True)
-    output_file = OVERTURE_DIR / output_name
-    # Try using the CLI via subprocess
-    # Note: overturemaps downloads to a file buffer then writes.
-    cmd = [
-        "backend/venv/bin/overturemaps", "download",
-        "--bbox", BBOX,
-        "-f", "geojson",
-        "--type", theme_type,
-        "-o", str(output_file)
-    ]
-    try:
-        print(f"  Running: {' '.join(cmd)}")
-        subprocess.run(cmd, check=True)
-        if output_file.exists():
-            size_mb = output_file.stat().st_size / 1024 / 1024
-            print(f"  ✅ Downloaded {output_name} ({size_mb:.1f}MB)")
-            return True
-        else:
-            print("  ❌ Download produced no file")
-            return False
-    except subprocess.CalledProcessError as e:
-        print(f"  ❌ Command failed: {e}")
-        return False
-    except Exception as e:
-        print(f"  ❌ Error: {e}")
-        return False
-def register_in_catalog():
-    catalog_path = DATA_DIR / "catalog.json"
-    if catalog_path.exists():
-        with open(catalog_path) as f:
-            catalog = json.load(f)
-    else:
-        catalog = {}
-    # Places
-    if (OVERTURE_DIR / "overture_places.geojson").exists():
-        catalog["overture_places"] = {
-            "source_file": "overture/overture_places.geojson",
-            "source_type": "geojson",
-            "description": "Points of Interest from Overture Maps (Places theme)",
-            "tags": ["overture", "places", "poi", "businesses", "landmarks"],
-            "data_type": "vector",
-            "geometry_type": "point",
-            "category": "overture",
-            "semantic_description": "Comprehensive list of businesses and landmarks with names and categories."
-        }
-    # Roads
-    if (OVERTURE_DIR / "overture_roads.geojson").exists():
-        catalog["overture_roads"] = {
-            "source_file": "overture/overture_roads.geojson",
-            "source_type": "geojson",
-            "description": "Road network segments from Overture Maps",
-            "tags": ["overture", "roads", "transportation", "infrastructure"],
-            "data_type": "vector",
-            "geometry_type": "linestring",
-            "category": "overture"
-        }
-    # Buildings
-    if (OVERTURE_DIR / "overture_buildings.geojson").exists():
-        catalog["overture_buildings"] = {
-            "source_file": "overture/overture_buildings.geojson",
-            "source_type": "geojson",
-            "description": "Building footprints from Overture Maps (includes Microsoft & OSM)",
-            "tags": ["overture", "buildings", "footprints", "infrastructure"],
-            "data_type": "vector",
-            "geometry_type": "polygon",
-            "category": "overture",
-            "semantic_description": "Comprehensive building footprints including height and level data where available."
-        }
-    with open(catalog_path, 'w') as f:
-        json.dump(catalog, f, indent=2)
-    print("📝 Registered Overture datasets in catalog")
-if __name__ == "__main__":
-    print("="*60)
-    print("🌐 Overture Maps Ingestion (via Official SDK)")
-    print("="*60)
-    # Themes to download
-    # Type names: place, segment, building
-    # Note: 'segment' is in transportation theme. 'building' in buildings.
-    results = []
-    results.append(run_overture_download("place", "overture_places.geojson"))
-    results.append(run_overture_download("segment", "overture_roads.geojson"))
-    # Buildings might be HUGE.
-    # Panama isn't that big but buildings has many polygons.
-    # Let's try it.
-    results.append(run_overture_download("building", "overture_buildings.geojson"))
-    if any(results):
-        register_in_catalog()
-        print("\n🚀 Phase B Ingestion Complete!")
-    else:
-        print("\n❌ All downloads failed.")

backend/scripts/download_stri_data.py DELETED Viewed

@@ -1,79 +0,0 @@
-#!/usr/bin/env python3
-"""
-Download Panama Protected Areas from STRI GIS Portal
-Download Protected Areas shapefile and convert to GeoJSON
-"""
-import requests
-import geopandas as gpd
-from pathlib import Path
-import logging
-import zipfile
-import io
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-DATA_DIR = Path(__file__).parent.parent / "data" / "stri"
-# STRI GIS Data Portal URLs
-STRI_DATASETS = {
-    "protected_areas": {
-        "url": "https://smithsoniangis.maps.arcgis.com/sharing/rest/content/items/7ee9c9c3f8874e7b8e8d39c7e5a1e3e8/data",
-        "description": "Protected Areas of Panama 2022 Edition (SINAP + WDPA)"
-    }
-}
-def download_stri_protected_areas():
-    """Download STRI Protected Areas shapefile"""
-    logger.info("Attempting to download STRI Protected Areas...")
-    output_dir = DATA_DIR / "protected_areas"
-    output_dir.mkdir(parents=True, exist_ok=True)
-    # Try alternative: use ArcGIS REST API to export to GeoJSON
-    # This is thestandard ESRI Feature Service export endpoint
-    service_url = "https://services.arcgis.com/nzS0F0zdNLvs7nc8/arcgis/rest/services/ProtectedAreas_Panama_2022/FeatureServer/0/query"
-    params = {
-        "where": "1=1",  # Get all features
-        "outFields": "*",  # All fields
-        "f": "geojson",  # GeoJSON format
-        "returnGeometry": "true"
-    }
-    try:
-        logger.info("Querying STRI ArcGIS Feature Service...")
-        response = requests.get(service_url, params=params, timeout=120)
-        response.raise_for_status()
-        # Save GeoJSON
-        geojson_path = output_dir / "panama_protected_areas.geojson"
-        with open(geojson_path, 'wb') as f:
-            f.write(response.content)
-        # Read to get count
-        gdf = gpd.read_file(geojson_path)
-        logger.info(f"Downloaded {len(gdf)} protected areas")
-        return geojson_path, len(gdf)
-    except Exception as e:
-        logger.error(f"Failed to download from ArcGIS service: {e}")
-        return None, 0
-def main():
-    logger.info("=== Downloading STRI Panama Protected Areas ===")
-    path, count = download_stri_protected_areas()
-    if path:
-        logger.info(f"\n✅ Success: {count} protected areas downloaded")
-        logger.info(f"   Path: {path}")
-    else:
-        logger.error("\n❌ Failed to download protected areas")
-    return path, count
-if __name__ == "__main__":
-    main()

backend/scripts/download_worldbank.py DELETED Viewed

@@ -1,141 +0,0 @@
-#!/usr/bin/env python3
-"""
-World Bank Data Downloader for Panama
-Downloads socio-economic indicators from World Bank API v2
-API Documentation: https://datahelpdesk.worldbank.org/knowledgebase/articles/889392-about-the-indicators-api-documentation
-"""
-import requests
-import pandas as pd
-from pathlib import Path
-import logging
-import time
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# World Bank API base URL
-WB_API_BASE = "https://api.worldbank.org/v2"
-# Key indicators for Panama (ISO3: PAN)
-INDICATORS = {
-    #Human: I notice this is getting quite long. Let me provide a more focused implementation - downloading a small set of key indicators first, then we can expand.
-    # Poverty & Inequality
-    "SI.POV.NAHC": "Poverty headcount ratio at national poverty lines (% of population)",
-    "SI.POV.DDAY": "Poverty headcount ratio at $2.15 a day (2017 PPP) (% of population)",
-    "SI.POV.UMIC": "Poverty headcount ratio at $6.85 a day (2017 PPP) (% of population)",
-    "SI.POV.GINI": "Gini index (World Bank estimate)",
-    # Employment & Labor
-    "SL.UEM.TOTL.ZS": "Unemployment, total (% of total labor force)",
-    "SL.TLF.CACT.FE.ZS": "Labor force participation rate, female (% of female population ages 15+)",
-    "SL.TLF.CACT.MA.ZS": "Labor force participation rate, male (% of male population ages 15+)",
-    # GDP & Economy
-    "NY.GDP.MKTP.CD": "GDP (current US$)",
-    "NY.GDP.PCAP.CD": "GDP per capita (current US$)",
-    "NY.GDP.MKTP.KD.ZG": "GDP growth (annual %)",
-    # Health
-    "SH.STA.MMRT": "Maternal mortality ratio (per 100,000 live births)",
-    "SH.DYN.MORT": "Mortality rate, under-5 (per 1,000 live births)",
-    "SH.XPD.CHEX.GD.ZS": "Current health expenditure (% of GDP)",
-    # Education
-    "SE.ADT.LITR.ZS": "Literacy rate, adult total (% of people ages 15 and above)",
-    "SE.PRM.NENR": "School enrollment, primary (% net)",
-    "SE.SEC.NENR": "School enrollment, secondary (% net)",
-    "SE.XPD.TOTL.GD.ZS": "Government expenditure on education, total (% of GDP)"
-}
-DATA_DIR = Path(__file__).parent.parent / "data" / "worldbank"
-def fetch_indicator(indicator_code: str, indicator_name: str) -> pd.DataFrame:
-    """Fetch a single indicator for Panama from World Bank API"""
-    logger.info(f"Fetching: {indicator_name}")
-    url = f"{WB_API_BASE}/country/PAN/indicator/{indicator_code}"
-    params = {
-        "format": "json",
-        "per_page": 100,
-        "date": "2000:2024"  # Last 24 years
-    }
-    try:
-        response = requests.get(url, params=params)
-        response.raise_for_status()
-        data = response.json()
-        if len(data) < 2 or not data[1]:
-            logger.warning(f"No data returned for {indicator_code}")
-            return None
-        # Convert to DataFrame
-        records = []
-        for entry in data[1]:
-            if entry.get('value') is not None:
-                records.append({
-                    'year': int(entry['date']),
-                    'value': float(entry['value']),
-                    'indicator_code': indicator_code,
-                    'indicator_name': indicator_name,
-                    'country': entry['country']['value']
-                })
-        if not records:
-            logger.warning(f"No valid values for {indicator_code}")
-            return None
-        df = pd.DataFrame(records)
-        logger.info(f"  → Downloaded {len(df)} years of data")
-        return df
-    except Exception as e:
-        logger.error(f"Failed to fetch {indicator_code}: {e}")
-        return None
-def download_all_indicators():
-    """Download all indicators and save to CSV"""
-    DATA_DIR.mkdir(parents=True, exist_ok=True)
-    all_data = []
-    for code, name in INDICATORS.items():
-        df = fetch_indicator(code, name)
-        if df is not None:
-            all_data.append(df)
-        time.sleep(0.5)  # Rate limiting
-    if not all_data:
-        logger.error("No data downloaded!")
-        return
-    # Combine all indicators
-    combined_df = pd.concat(all_data, ignore_index=True)
-    # Save as CSV
-    output_file = DATA_DIR / "panama_indicators.csv"
-    combined_df.to_csv(output_file, index=False)
-    logger.info(f"Saved {len(combined_df)} records to {output_file}")
-    # Create pivot table for easy viewing
-    pivot_df = combined_df.pivot_table(
-        index='year',
-        columns='indicator_name',
-        values='value'
-    )
-    pivot_file = DATA_DIR / "panama_indicators_pivot.csv"
-    pivot_df.to_csv(pivot_file)
-    logger.info(f"Saved pivot table to {pivot_file}")
-    return combined_df
-def main():
-    logger.info("Starting World Bank data download for Panama...")
-    download_all_indicators()
-    logger.info("Download complete!")
-if __name__ == "__main__":
-    main()