|
|
|
|
|
""" |
|
|
Create province-level socio-economic layer for Panama |
|
|
Uses known data from research (MPI, Census highlights) joined to admin boundaries |
|
|
""" |
|
|
|
|
|
import geopandas as gpd |
|
|
import pandas as pd |
|
|
from pathlib import Path |
|
|
import logging |
|
|
import json |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
DATA_DIR = Path(__file__).parent.parent / "data" |
|
|
BASE_DIR = DATA_DIR / "base" |
|
|
OUTPUT_DIR = DATA_DIR / "socioeconomic" |
|
|
|
|
|
|
|
|
|
|
|
PROVINCE_DATA = { |
|
|
"Bocas del Toro": { |
|
|
"mpi_poverty_pct": 75.0, |
|
|
"population_2023": 159228, |
|
|
"avg_income_pab": 383.14, |
|
|
"disability_rate": 3.21 |
|
|
}, |
|
|
"Coclé": { |
|
|
"mpi_poverty_pct": 35.0, |
|
|
"population_2023": 278000 |
|
|
}, |
|
|
"Colón": { |
|
|
"mpi_poverty_pct": 40.0, |
|
|
"population_2023": 283000 |
|
|
}, |
|
|
"Chiriquí": { |
|
|
"mpi_poverty_pct": 30.0, |
|
|
"population_2023": 498000 |
|
|
}, |
|
|
"Darién": { |
|
|
"mpi_poverty_pct": 65.0, |
|
|
"population_2023": 57000 |
|
|
}, |
|
|
"Herrera": { |
|
|
"mpi_poverty_pct": 25.0, |
|
|
"population_2023": 123000 |
|
|
}, |
|
|
"Los Santos": { |
|
|
"mpi_poverty_pct": 22.0, |
|
|
"population_2023": 97000 |
|
|
}, |
|
|
"Panamá": { |
|
|
"mpi_poverty_pct": 15.0, |
|
|
"population_2023": 2100000 |
|
|
}, |
|
|
"Panamá Oeste": { |
|
|
"mpi_poverty_pct": 18.0, |
|
|
"population_2023": 550000 |
|
|
}, |
|
|
"Veraguas": { |
|
|
"mpi_poverty_pct": 45.0, |
|
|
"population_2023": 261000 |
|
|
}, |
|
|
|
|
|
"Ngäbe-Buglé": { |
|
|
"mpi_poverty_pct": 93.4, |
|
|
"population_2023": 201000, |
|
|
"note": "Highest multidimensional poverty in Panama" |
|
|
}, |
|
|
"Guna Yala": { |
|
|
"mpi_poverty_pct": 91.4, |
|
|
"population_2023": 38000, |
|
|
"note": "Second highest poverty" |
|
|
}, |
|
|
"Emberá-Wounaan": { |
|
|
"mpi_poverty_pct": 85.0, |
|
|
"population_2023": 10000 |
|
|
} |
|
|
} |
|
|
|
|
|
def load_admin1(): |
|
|
"""Load province boundaries""" |
|
|
admin1_path = BASE_DIR / "pan_admin1.geojson" |
|
|
gdf = gpd.read_file(admin1_path) |
|
|
logger.info(f"Loaded {len(gdf)} province boundaries") |
|
|
return gdf |
|
|
|
|
|
def create_province_layer(): |
|
|
"""Create GeoJSON with province-level socioeconomic data""" |
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
admin_gdf = load_admin1() |
|
|
|
|
|
|
|
|
data_records = [] |
|
|
for province_name, data in PROVINCE_DATA.items(): |
|
|
record = {"province_name": province_name, **data} |
|
|
data_records.append(record) |
|
|
|
|
|
data_df = pd.DataFrame(data_records) |
|
|
logger.info(f"Created data for {len(data_df)} provinces") |
|
|
|
|
|
|
|
|
|
|
|
admin_gdf['province_clean'] = admin_gdf['adm1_name'].str.strip() |
|
|
|
|
|
|
|
|
name_mapping = { |
|
|
"Ngöbe-Buglé": "Ngäbe-Buglé", |
|
|
"Ngöbe Buglé": "Ngäbe-Buglé", |
|
|
"Comarca Ngöbe-Buglé": "Ngäbe-Buglé", |
|
|
"Kuna Yala": "Guna Yala", |
|
|
"Comarca Guna Yala": "Guna Yala", |
|
|
"Comarca Kuna Yala": "Guna Yala", |
|
|
"Emberá": "Emberá-Wounaan", |
|
|
"Comarca Emberá-Wounaan": "Emberá-Wounaan", |
|
|
"Comarca Emberá": "Emberá-Wounaan" |
|
|
} |
|
|
|
|
|
admin_gdf['province_match'] = admin_gdf['province_clean'].replace(name_mapping) |
|
|
|
|
|
|
|
|
merged_gdf = admin_gdf.merge( |
|
|
data_df, |
|
|
left_on='province_match', |
|
|
right_on='province_name', |
|
|
how='left' |
|
|
) |
|
|
|
|
|
|
|
|
matched = merged_gdf['mpi_poverty_pct'].notna().sum() |
|
|
logger.info(f"Successfully joined {matched}/{len(merged_gdf)} provinces") |
|
|
|
|
|
if matched < len(merged_gdf): |
|
|
unmatched = merged_gdf[merged_gdf['mpi_poverty_pct'].isna()]['adm1_name'].tolist() |
|
|
logger.warning(f"Unmatched provinces: {unmatched}") |
|
|
|
|
|
|
|
|
output_gdf = merged_gdf[[ |
|
|
'adm1_name', 'adm1_pcode', 'area_sqkm', |
|
|
'mpi_poverty_pct', 'population_2023', 'avg_income_pab', 'disability_rate', 'note', |
|
|
'geometry' |
|
|
]].copy() |
|
|
|
|
|
|
|
|
output_file = OUTPUT_DIR / "province_socioeconomic.geojson" |
|
|
output_gdf.to_file(output_file, driver='GeoJSON') |
|
|
|
|
|
logger.info(f"Created province layer: {output_file}") |
|
|
logger.info(f" - {matched} provinces with MPI data") |
|
|
logger.info(f" - {output_gdf['population_2023'].notna().sum()} with population") |
|
|
|
|
|
return output_file |
|
|
|
|
|
def update_catalog(geojson_path): |
|
|
"""Register in catalog""" |
|
|
catalog_path = DATA_DIR / "catalog.json" |
|
|
|
|
|
with open(catalog_path, 'r') as f: |
|
|
catalog = json.load(f) |
|
|
|
|
|
catalog["province_socioeconomic"] = { |
|
|
"path": str(geojson_path.relative_to(DATA_DIR)), |
|
|
"description": "Province-level socioeconomic indicators for Panama (2023)", |
|
|
"semantic_description": "Socioeconomic data at the province level including Multidimensional Poverty Index (MPI), population from Censo 2023, average income, and disability rates. Shows dramatic geographic inequality: Ngäbe-Buglé comarca has 93.4% poverty vs 15% in Panamá province. Use for analyzing regional disparities in poverty, development, and demographics.", |
|
|
"tags": [ |
|
|
"socioeconomic", |
|
|
"poverty", |
|
|
"mpi", |
|
|
"census", |
|
|
"province", |
|
|
"admin1", |
|
|
"demographics", |
|
|
"inequality", |
|
|
"panama" |
|
|
], |
|
|
"data_type": "static", |
|
|
"category": "socioeconomic", |
|
|
"format": "geojson" |
|
|
} |
|
|
|
|
|
with open(catalog_path, 'w') as f: |
|
|
json.dump(catalog, f, indent=2) |
|
|
|
|
|
logger.info("Updated catalog.json") |
|
|
|
|
|
def main(): |
|
|
logger.info("Creating province socioeconomic layer...") |
|
|
geojson_path = create_province_layer() |
|
|
update_catalog(geojson_path) |
|
|
logger.info("Complete!") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|