File size: 4,719 Bytes
4851501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python3
"""
Process World Bank indicators and create GeoJSON layers
Joins most recent indicator data to Panama administrative boundaries
"""

import pandas as pd
import geopandas as gpd
from pathlib import Path
import logging
import json

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

DATA_DIR = Path(__file__).parent.parent / "data"
WB_DIR = DATA_DIR / "worldbank"
BASE_DIR = DATA_DIR / "base"
OUTPUT_DIR = DATA_DIR / "socioeconomic"

def load_admin_boundaries():
    """Load Panama administrative boundaries as GeoDataFrame"""
    admin1_path = BASE_DIR / "pan_admin1.geojson"
    
    if not admin1_path.exists():
        logger.error(f"Admin boundaries not found: {admin1_path}")
        return None
    
    gdf = gpd.read_file(admin1_path)
    logger.info(f"Loaded {len(gdf)} provinces")
    return gdf

def process_indicators():
    """Load and process World Bank indicators"""
    csv_path = WB_DIR / "panama_indicators.csv"
    
    if not csv_path.exists():
        logger.error(f"Indicators file not found: {csv_path}")
        return None
    
    df = pd.read_csv(csv_path)
    logger.info(f"Loaded {len(df)} indicator records")
    
    # Get most recent year for each indicator
    latest_df = df.loc[df.groupby('indicator_code')['year'].idxmax()]
    logger.info(f"Selected most recent data for {len(latest_df)} indicators")
    
    return latest_df

def create_national_geojson(indicators_df, admin_gdf):
    """Create GeoJSON for national-level indicators"""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    
    # Since WB data is national-level, we'll attach it to the country boundary (admin0)
    # For now, create a simple point feature at Panama's center with the indicators
    
    features = []
    
    # Create one feature with all latest indicators
    properties = {
        'country': 'Panama',
        'data_year': int(indicators_df['year'].max())
    }
    
    # Add each indicator as a property
    for _, row in indicators_df.iterrows():
        # Create clean column name (remove special chars)
        col_name = row['indicator_code'].lower().replace('.', '_')
        properties[col_name] = row['value']
        properties[f"{col_name}_name"] = row['indicator_name']
    
    # Use Panama's approximate center
    feature = {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [-80.0, 8.5]  # Approximate center of Panama
        },
        "properties": properties
    }
    
    geojson = {
        "type": "FeatureCollection",
        "features": [feature]
    }
    
    # Save GeoJSON
    output_file = OUTPUT_DIR / "panama_national_indicators.geojson"
    with open(output_file, 'w') as f:
        json.dump(geojson, f, indent=2)
    
    logger.info(f"Created national indicators GeoJSON: {output_file}")
    logger.info(f"  Indicators included: {len(indicators_df)}")
    
    return output_file

def update_catalog(geojson_path):
    """Add the new dataset to catalog.json"""
    catalog_path = DATA_DIR / "catalog.json"
    
    with open(catalog_path, 'r') as f:
        catalog = json.load(f)
    
    # Add new entry
    catalog["panama_national_indicators"] = {
        "path": str(geojson_path.relative_to(DATA_DIR)),
        "description": "National socio-economic indicators from World Bank (2000-2024)",
        "semantic_description": "Comprehensive national-level statistics for Panama including poverty rates, GDP, unemployment, health expenditure, maternal/child mortality, literacy rates, and school enrollment. Data sourced from World Bank Open Data API. Use this dataset for analyzing Panama's socio-economic development trends over time.",
        "tags": [
            "socioeconomic",
            "worldbank",
            "poverty",
            "gdp",
            "employment",
            "health",
            "education",
            "national",
            "panama"
        ],
        "data_type": "static",
        "category": "socioeconomic",
        "format": "geojson"
    }
    
    with open(catalog_path, 'w') as f:
        json.dump(catalog, f, indent=2)
    
    logger.info("Updated catalog.json")

def main():
    logger.info("Processing World Bank indicators...")
    
    # Load data
    admin_gdf = load_admin_boundaries()
    indicators_df = process_indicators()
    
    if admin_gdf is None or indicators_df is None:
        logger.error("Failed to load required data")
        return
    
    # Create GeoJSON
    geojson_path = create_national_geojson(indicators_df, admin_gdf)
    
    # Update catalog
    update_catalog(geojson_path)
    
    logger.info("Processing complete!")

if __name__ == "__main__":
    main()