File size: 4,719 Bytes
4851501 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
#!/usr/bin/env python3
"""
Process World Bank indicators and create GeoJSON layers
Joins most recent indicator data to Panama administrative boundaries
"""
import pandas as pd
import geopandas as gpd
from pathlib import Path
import logging
import json
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
DATA_DIR = Path(__file__).parent.parent / "data"
WB_DIR = DATA_DIR / "worldbank"
BASE_DIR = DATA_DIR / "base"
OUTPUT_DIR = DATA_DIR / "socioeconomic"
def load_admin_boundaries():
"""Load Panama administrative boundaries as GeoDataFrame"""
admin1_path = BASE_DIR / "pan_admin1.geojson"
if not admin1_path.exists():
logger.error(f"Admin boundaries not found: {admin1_path}")
return None
gdf = gpd.read_file(admin1_path)
logger.info(f"Loaded {len(gdf)} provinces")
return gdf
def process_indicators():
"""Load and process World Bank indicators"""
csv_path = WB_DIR / "panama_indicators.csv"
if not csv_path.exists():
logger.error(f"Indicators file not found: {csv_path}")
return None
df = pd.read_csv(csv_path)
logger.info(f"Loaded {len(df)} indicator records")
# Get most recent year for each indicator
latest_df = df.loc[df.groupby('indicator_code')['year'].idxmax()]
logger.info(f"Selected most recent data for {len(latest_df)} indicators")
return latest_df
def create_national_geojson(indicators_df, admin_gdf):
"""Create GeoJSON for national-level indicators"""
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Since WB data is national-level, we'll attach it to the country boundary (admin0)
# For now, create a simple point feature at Panama's center with the indicators
features = []
# Create one feature with all latest indicators
properties = {
'country': 'Panama',
'data_year': int(indicators_df['year'].max())
}
# Add each indicator as a property
for _, row in indicators_df.iterrows():
# Create clean column name (remove special chars)
col_name = row['indicator_code'].lower().replace('.', '_')
properties[col_name] = row['value']
properties[f"{col_name}_name"] = row['indicator_name']
# Use Panama's approximate center
feature = {
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [-80.0, 8.5] # Approximate center of Panama
},
"properties": properties
}
geojson = {
"type": "FeatureCollection",
"features": [feature]
}
# Save GeoJSON
output_file = OUTPUT_DIR / "panama_national_indicators.geojson"
with open(output_file, 'w') as f:
json.dump(geojson, f, indent=2)
logger.info(f"Created national indicators GeoJSON: {output_file}")
logger.info(f" Indicators included: {len(indicators_df)}")
return output_file
def update_catalog(geojson_path):
"""Add the new dataset to catalog.json"""
catalog_path = DATA_DIR / "catalog.json"
with open(catalog_path, 'r') as f:
catalog = json.load(f)
# Add new entry
catalog["panama_national_indicators"] = {
"path": str(geojson_path.relative_to(DATA_DIR)),
"description": "National socio-economic indicators from World Bank (2000-2024)",
"semantic_description": "Comprehensive national-level statistics for Panama including poverty rates, GDP, unemployment, health expenditure, maternal/child mortality, literacy rates, and school enrollment. Data sourced from World Bank Open Data API. Use this dataset for analyzing Panama's socio-economic development trends over time.",
"tags": [
"socioeconomic",
"worldbank",
"poverty",
"gdp",
"employment",
"health",
"education",
"national",
"panama"
],
"data_type": "static",
"category": "socioeconomic",
"format": "geojson"
}
with open(catalog_path, 'w') as f:
json.dump(catalog, f, indent=2)
logger.info("Updated catalog.json")
def main():
logger.info("Processing World Bank indicators...")
# Load data
admin_gdf = load_admin_boundaries()
indicators_df = process_indicators()
if admin_gdf is None or indicators_df is None:
logger.error("Failed to load required data")
return
# Create GeoJSON
geojson_path = create_national_geojson(indicators_df, admin_gdf)
# Update catalog
update_catalog(geojson_path)
logger.info("Processing complete!")
if __name__ == "__main__":
main()
|