| | """ |
| | Script to extract mapping data from vietnamadminunits package |
| | and generate data/mapping.json for standalone use. |
| | |
| | Usage: |
| | uv run python scripts/build_mapping.py |
| | """ |
| |
|
| | import json |
| | from pathlib import Path |
| |
|
| |
|
| | def build_mapping(): |
| | import vietnamadminunits |
| |
|
| | pkg_dir = Path(vietnamadminunits.__file__).parent |
| |
|
| | |
| | with open(pkg_dir / "data" / "converter_2025.json") as f: |
| | converter = json.load(f) |
| | with open(pkg_dir / "data" / "parser_legacy.json") as f: |
| | legacy = json.load(f) |
| | with open(pkg_dir / "data" / "parser_from_2025.json") as f: |
| | new_parser = json.load(f) |
| |
|
| | |
| | |
| | province_mapping = {} |
| | for new_key, old_keys in converter["DICT_PROVINCE"].items(): |
| | for old_key in old_keys: |
| | province_mapping[old_key] = new_key |
| |
|
| | |
| | province_names = {} |
| | for key, info in new_parser["DICT_PROVINCE"].items(): |
| | province_names[key] = { |
| | "name": info["province"], |
| | "short": info["provinceShort"], |
| | "code": info["provinceCode"], |
| | } |
| |
|
| | old_province_names = {} |
| | for key, info in legacy["DICT_PROVINCE"].items(): |
| | old_province_names[key] = { |
| | "name": info["province"], |
| | "short": info["provinceShort"], |
| | "code": info["provinceCode"], |
| | } |
| |
|
| | |
| | new_ward_names = {} |
| | for prov_key, wards in new_parser["DICT_PROVINCE_WARD_NO_ACCENTED"].items(): |
| | new_ward_names[prov_key] = {} |
| | for ward_key, info in wards.items(): |
| | new_ward_names[prov_key][ward_key] = { |
| | "name": info["ward"], |
| | "short": info["wardShort"], |
| | "type": info["wardType"], |
| | "code": info["wardCode"], |
| | } |
| |
|
| | |
| | old_ward_names = {} |
| | for prov_key, districts in legacy["DICT_PROVINCE_DISTRICT_WARD_NO_ACCENTED"].items(): |
| | old_ward_names[prov_key] = {} |
| | for dist_key, wards in districts.items(): |
| | for ward_key, info in wards.items(): |
| | old_ward_names[prov_key][f"{prov_key}_{dist_key}_{ward_key}"] = { |
| | "name": info["ward"], |
| | "short": info["wardShort"], |
| | "type": info["wardType"], |
| | "code": info["wardCode"], |
| | } |
| |
|
| | |
| | old_district_names = {} |
| | for prov_key, districts in legacy.get("DICT_PROVINCE_DISTRICT", {}).items(): |
| | old_district_names[prov_key] = {} |
| | for dist_key, info in districts.items(): |
| | old_district_names[prov_key][dist_key] = { |
| | "name": info.get("district", ""), |
| | "short": info.get("districtShort", ""), |
| | "type": info.get("districtType", ""), |
| | } |
| |
|
| | |
| | ward_mapping = [] |
| |
|
| | |
| | for new_prov_key, wards in converter["DICT_PROVINCE_WARD_NO_DIVIDED"].items(): |
| | new_prov_info = province_names.get(new_prov_key, {}) |
| |
|
| | for new_ward_key, old_compound_keys in wards.items(): |
| | new_ward_info = new_ward_names.get(new_prov_key, {}).get(new_ward_key, {}) |
| |
|
| | for old_compound_key in old_compound_keys: |
| | |
| | parts = old_compound_key.split("_", 2) |
| | if len(parts) < 2: |
| | continue |
| | old_prov_key = parts[0] |
| | rest = "_".join(parts[1:]) if len(parts) > 1 else "" |
| |
|
| | |
| | old_full_key = old_compound_key |
| | old_ward_info = {} |
| | old_dist_info = {} |
| |
|
| | |
| | if old_prov_key in old_ward_names: |
| | old_ward_info = old_ward_names[old_prov_key].get(old_full_key, {}) |
| |
|
| | |
| | if len(parts) == 3: |
| | old_dist_key = parts[1] |
| | old_ward_key_str = parts[2] |
| | if old_prov_key in old_district_names: |
| | old_dist_info = old_district_names[old_prov_key].get(old_dist_key, {}) |
| | elif len(parts) == 2: |
| | old_dist_key = parts[1] |
| | old_ward_key_str = "" |
| | if old_prov_key in old_district_names: |
| | old_dist_info = old_district_names[old_prov_key].get(old_dist_key, {}) |
| |
|
| | |
| | if len(old_compound_keys) == 1: |
| | |
| | if old_ward_info.get("name") == new_ward_info.get("name"): |
| | mapping_type = "unchanged" |
| | else: |
| | mapping_type = "renamed" |
| | else: |
| | mapping_type = "merged" |
| |
|
| | record = { |
| | "old_province": old_province_names.get(old_prov_key, {}).get("name", ""), |
| | "old_province_key": old_prov_key, |
| | "old_district": old_dist_info.get("name", ""), |
| | "old_district_key": parts[1] if len(parts) >= 2 else "", |
| | "old_ward": old_ward_info.get("name", ""), |
| | "old_ward_key": old_ward_key_str if len(parts) == 3 else "", |
| | "new_province": new_prov_info.get("name", ""), |
| | "new_province_key": new_prov_key, |
| | "new_ward": new_ward_info.get("name", ""), |
| | "new_ward_key": new_ward_key, |
| | "mapping_type": mapping_type, |
| | } |
| | ward_mapping.append(record) |
| |
|
| | |
| | for new_prov_key, old_wards in converter["DICT_PROVINCE_WARD_DIVIDED"].items(): |
| | new_prov_info = province_names.get(new_prov_key, {}) |
| |
|
| | for old_compound_key, new_ward_options in old_wards.items(): |
| | parts = old_compound_key.split("_", 2) |
| | if len(parts) < 2: |
| | continue |
| | old_prov_key = parts[0] |
| |
|
| | old_ward_info = {} |
| | old_dist_info = {} |
| | if old_prov_key in old_ward_names: |
| | old_ward_info = old_ward_names[old_prov_key].get(old_compound_key, {}) |
| | if len(parts) >= 2 and old_prov_key in old_district_names: |
| | old_dist_info = old_district_names[old_prov_key].get(parts[1], {}) |
| |
|
| | for option in new_ward_options: |
| | new_ward_key = option["newWardKey"] |
| | new_ward_info = new_ward_names.get(new_prov_key, {}).get(new_ward_key, {}) |
| |
|
| | record = { |
| | "old_province": old_province_names.get(old_prov_key, {}).get("name", ""), |
| | "old_province_key": old_prov_key, |
| | "old_district": old_dist_info.get("name", ""), |
| | "old_district_key": parts[1] if len(parts) >= 2 else "", |
| | "old_ward": old_ward_info.get("name", ""), |
| | "old_ward_key": parts[2] if len(parts) == 3 else "", |
| | "new_province": new_prov_info.get("name", ""), |
| | "new_province_key": new_prov_key, |
| | "new_ward": new_ward_info.get("name", ""), |
| | "new_ward_key": new_ward_key, |
| | "mapping_type": "divided", |
| | "is_default": option.get("isDefaultNewWard", False), |
| | } |
| | ward_mapping.append(record) |
| |
|
| | |
| | mapping = { |
| | "metadata": { |
| | "source": "vietnamadminunits", |
| | "version": "1.0.4", |
| | "effective_date": "2025-07-01", |
| | "old_provinces": len(old_province_names), |
| | "new_provinces": len(province_names), |
| | "total_records": len(ward_mapping), |
| | }, |
| | "province_mapping": province_mapping, |
| | "province_names": province_names, |
| | "old_province_names": old_province_names, |
| | "ward_mapping": ward_mapping, |
| | } |
| |
|
| | output = Path(__file__).parent.parent / "data" / "mapping.json" |
| | output.parent.mkdir(parents=True, exist_ok=True) |
| | with open(output, "w", encoding="utf-8") as f: |
| | json.dump(mapping, f, ensure_ascii=False, indent=2) |
| |
|
| | print(f"Generated {output}") |
| | print(f" Province mappings: {len(province_mapping)} old -> {len(province_names)} new") |
| | print(f" Ward mapping records: {len(ward_mapping)}") |
| |
|
| | |
| | types = {} |
| | for r in ward_mapping: |
| | t = r["mapping_type"] |
| | types[t] = types.get(t, 0) + 1 |
| | for t, c in sorted(types.items()): |
| | print(f" {t}: {c}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | build_mapping() |
| |
|