#!/usr/bin/env python3 """ Process World Bank indicators and create GeoJSON layers Joins most recent indicator data to Panama administrative boundaries """ import pandas as pd import geopandas as gpd from pathlib import Path import logging import json logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) DATA_DIR = Path(__file__).parent.parent / "data" WB_DIR = DATA_DIR / "worldbank" BASE_DIR = DATA_DIR / "base" OUTPUT_DIR = DATA_DIR / "socioeconomic" def load_admin_boundaries(): """Load Panama administrative boundaries as GeoDataFrame""" admin1_path = BASE_DIR / "pan_admin1.geojson" if not admin1_path.exists(): logger.error(f"Admin boundaries not found: {admin1_path}") return None gdf = gpd.read_file(admin1_path) logger.info(f"Loaded {len(gdf)} provinces") return gdf def process_indicators(): """Load and process World Bank indicators""" csv_path = WB_DIR / "panama_indicators.csv" if not csv_path.exists(): logger.error(f"Indicators file not found: {csv_path}") return None df = pd.read_csv(csv_path) logger.info(f"Loaded {len(df)} indicator records") # Get most recent year for each indicator latest_df = df.loc[df.groupby('indicator_code')['year'].idxmax()] logger.info(f"Selected most recent data for {len(latest_df)} indicators") return latest_df def create_national_geojson(indicators_df, admin_gdf): """Create GeoJSON for national-level indicators""" OUTPUT_DIR.mkdir(parents=True, exist_ok=True) # Since WB data is national-level, we'll attach it to the country boundary (admin0) # For now, create a simple point feature at Panama's center with the indicators features = [] # Create one feature with all latest indicators properties = { 'country': 'Panama', 'data_year': int(indicators_df['year'].max()) } # Add each indicator as a property for _, row in indicators_df.iterrows(): # Create clean column name (remove special chars) col_name = row['indicator_code'].lower().replace('.', '_') properties[col_name] = row['value'] properties[f"{col_name}_name"] = row['indicator_name'] # Use Panama's approximate center feature = { "type": "Feature", "geometry": { "type": "Point", "coordinates": [-80.0, 8.5] # Approximate center of Panama }, "properties": properties } geojson = { "type": "FeatureCollection", "features": [feature] } # Save GeoJSON output_file = OUTPUT_DIR / "panama_national_indicators.geojson" with open(output_file, 'w') as f: json.dump(geojson, f, indent=2) logger.info(f"Created national indicators GeoJSON: {output_file}") logger.info(f" Indicators included: {len(indicators_df)}") return output_file def update_catalog(geojson_path): """Add the new dataset to catalog.json""" catalog_path = DATA_DIR / "catalog.json" with open(catalog_path, 'r') as f: catalog = json.load(f) # Add new entry catalog["panama_national_indicators"] = { "path": str(geojson_path.relative_to(DATA_DIR)), "description": "National socio-economic indicators from World Bank (2000-2024)", "semantic_description": "Comprehensive national-level statistics for Panama including poverty rates, GDP, unemployment, health expenditure, maternal/child mortality, literacy rates, and school enrollment. Data sourced from World Bank Open Data API. Use this dataset for analyzing Panama's socio-economic development trends over time.", "tags": [ "socioeconomic", "worldbank", "poverty", "gdp", "employment", "health", "education", "national", "panama" ], "data_type": "static", "category": "socioeconomic", "format": "geojson" } with open(catalog_path, 'w') as f: json.dump(catalog, f, indent=2) logger.info("Updated catalog.json") def main(): logger.info("Processing World Bank indicators...") # Load data admin_gdf = load_admin_boundaries() indicators_df = process_indicators() if admin_gdf is None or indicators_df is None: logger.error("Failed to load required data") return # Create GeoJSON geojson_path = create_national_geojson(indicators_df, admin_gdf) # Update catalog update_catalog(geojson_path) logger.info("Processing complete!") if __name__ == "__main__": main()