|
|
|
|
|
""" |
|
|
Process World Bank indicators and create GeoJSON layers |
|
|
Joins most recent indicator data to Panama administrative boundaries |
|
|
""" |
|
|
|
|
|
import pandas as pd |
|
|
import geopandas as gpd |
|
|
from pathlib import Path |
|
|
import logging |
|
|
import json |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
DATA_DIR = Path(__file__).parent.parent / "data" |
|
|
WB_DIR = DATA_DIR / "worldbank" |
|
|
BASE_DIR = DATA_DIR / "base" |
|
|
OUTPUT_DIR = DATA_DIR / "socioeconomic" |
|
|
|
|
|
def load_admin_boundaries(): |
|
|
"""Load Panama administrative boundaries as GeoDataFrame""" |
|
|
admin1_path = BASE_DIR / "pan_admin1.geojson" |
|
|
|
|
|
if not admin1_path.exists(): |
|
|
logger.error(f"Admin boundaries not found: {admin1_path}") |
|
|
return None |
|
|
|
|
|
gdf = gpd.read_file(admin1_path) |
|
|
logger.info(f"Loaded {len(gdf)} provinces") |
|
|
return gdf |
|
|
|
|
|
def process_indicators(): |
|
|
"""Load and process World Bank indicators""" |
|
|
csv_path = WB_DIR / "panama_indicators.csv" |
|
|
|
|
|
if not csv_path.exists(): |
|
|
logger.error(f"Indicators file not found: {csv_path}") |
|
|
return None |
|
|
|
|
|
df = pd.read_csv(csv_path) |
|
|
logger.info(f"Loaded {len(df)} indicator records") |
|
|
|
|
|
|
|
|
latest_df = df.loc[df.groupby('indicator_code')['year'].idxmax()] |
|
|
logger.info(f"Selected most recent data for {len(latest_df)} indicators") |
|
|
|
|
|
return latest_df |
|
|
|
|
|
def create_national_geojson(indicators_df, admin_gdf): |
|
|
"""Create GeoJSON for national-level indicators""" |
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
features = [] |
|
|
|
|
|
|
|
|
properties = { |
|
|
'country': 'Panama', |
|
|
'data_year': int(indicators_df['year'].max()) |
|
|
} |
|
|
|
|
|
|
|
|
for _, row in indicators_df.iterrows(): |
|
|
|
|
|
col_name = row['indicator_code'].lower().replace('.', '_') |
|
|
properties[col_name] = row['value'] |
|
|
properties[f"{col_name}_name"] = row['indicator_name'] |
|
|
|
|
|
|
|
|
feature = { |
|
|
"type": "Feature", |
|
|
"geometry": { |
|
|
"type": "Point", |
|
|
"coordinates": [-80.0, 8.5] |
|
|
}, |
|
|
"properties": properties |
|
|
} |
|
|
|
|
|
geojson = { |
|
|
"type": "FeatureCollection", |
|
|
"features": [feature] |
|
|
} |
|
|
|
|
|
|
|
|
output_file = OUTPUT_DIR / "panama_national_indicators.geojson" |
|
|
with open(output_file, 'w') as f: |
|
|
json.dump(geojson, f, indent=2) |
|
|
|
|
|
logger.info(f"Created national indicators GeoJSON: {output_file}") |
|
|
logger.info(f" Indicators included: {len(indicators_df)}") |
|
|
|
|
|
return output_file |
|
|
|
|
|
def update_catalog(geojson_path): |
|
|
"""Add the new dataset to catalog.json""" |
|
|
catalog_path = DATA_DIR / "catalog.json" |
|
|
|
|
|
with open(catalog_path, 'r') as f: |
|
|
catalog = json.load(f) |
|
|
|
|
|
|
|
|
catalog["panama_national_indicators"] = { |
|
|
"path": str(geojson_path.relative_to(DATA_DIR)), |
|
|
"description": "National socio-economic indicators from World Bank (2000-2024)", |
|
|
"semantic_description": "Comprehensive national-level statistics for Panama including poverty rates, GDP, unemployment, health expenditure, maternal/child mortality, literacy rates, and school enrollment. Data sourced from World Bank Open Data API. Use this dataset for analyzing Panama's socio-economic development trends over time.", |
|
|
"tags": [ |
|
|
"socioeconomic", |
|
|
"worldbank", |
|
|
"poverty", |
|
|
"gdp", |
|
|
"employment", |
|
|
"health", |
|
|
"education", |
|
|
"national", |
|
|
"panama" |
|
|
], |
|
|
"data_type": "static", |
|
|
"category": "socioeconomic", |
|
|
"format": "geojson" |
|
|
} |
|
|
|
|
|
with open(catalog_path, 'w') as f: |
|
|
json.dump(catalog, f, indent=2) |
|
|
|
|
|
logger.info("Updated catalog.json") |
|
|
|
|
|
def main(): |
|
|
logger.info("Processing World Bank indicators...") |
|
|
|
|
|
|
|
|
admin_gdf = load_admin_boundaries() |
|
|
indicators_df = process_indicators() |
|
|
|
|
|
if admin_gdf is None or indicators_df is None: |
|
|
logger.error("Failed to load required data") |
|
|
return |
|
|
|
|
|
|
|
|
geojson_path = create_national_geojson(indicators_df, admin_gdf) |
|
|
|
|
|
|
|
|
update_catalog(geojson_path) |
|
|
|
|
|
logger.info("Processing complete!") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|