|
|
""" |
|
|
Data Loader Service for Panama Geographic Data |
|
|
|
|
|
Loads GeoJSON files from the data/raw directory and provides |
|
|
query capabilities for the LLM to search and filter features. |
|
|
""" |
|
|
|
|
|
import os |
|
|
import json |
|
|
from typing import List, Dict, Any, Optional |
|
|
from functools import lru_cache |
|
|
|
|
|
|
|
|
class PanamaDataLoader: |
|
|
""" |
|
|
Singleton service to load and query Panama geographic data. |
|
|
Loads data once on first access and caches in memory. |
|
|
""" |
|
|
|
|
|
_instance = None |
|
|
_data_loaded = False |
|
|
|
|
|
|
|
|
admin0: List[Dict[str, Any]] = [] |
|
|
admin1: List[Dict[str, Any]] = [] |
|
|
admin2: List[Dict[str, Any]] = [] |
|
|
admin3: List[Dict[str, Any]] = [] |
|
|
|
|
|
def __new__(cls): |
|
|
if cls._instance is None: |
|
|
cls._instance = super().__new__(cls) |
|
|
return cls._instance |
|
|
|
|
|
def __init__(self): |
|
|
if not PanamaDataLoader._data_loaded: |
|
|
self._load_data() |
|
|
PanamaDataLoader._data_loaded = True |
|
|
|
|
|
def _get_data_path(self) -> str: |
|
|
"""Get the path to the data/raw directory.""" |
|
|
|
|
|
current_dir = os.path.dirname(os.path.abspath(__file__)) |
|
|
project_root = os.path.dirname(os.path.dirname(current_dir)) |
|
|
return os.path.join(project_root, "data", "raw") |
|
|
|
|
|
def _load_geojson(self, filename: str) -> List[Dict[str, Any]]: |
|
|
"""Load a GeoJSON file and return its features.""" |
|
|
filepath = os.path.join(self._get_data_path(), filename) |
|
|
|
|
|
if not os.path.exists(filepath): |
|
|
print(f"Warning: {filepath} not found") |
|
|
return [] |
|
|
|
|
|
try: |
|
|
with open(filepath, 'r', encoding='utf-8') as f: |
|
|
data = json.load(f) |
|
|
features = data.get('features', []) |
|
|
print(f" Loaded {len(features)} features from {filename}") |
|
|
return features |
|
|
except Exception as e: |
|
|
print(f"Error loading {filename}: {e}") |
|
|
return [] |
|
|
|
|
|
def _load_data(self): |
|
|
"""Load all GeoJSON data files.""" |
|
|
print("=" * 50) |
|
|
print("Loading Panama Geographic Data...") |
|
|
print("=" * 50) |
|
|
|
|
|
self.admin0 = self._load_geojson("pan_admin0.geojson") |
|
|
self.admin1 = self._load_geojson("pan_admin1.geojson") |
|
|
self.admin2 = self._load_geojson("pan_admin2.geojson") |
|
|
self.admin3 = self._load_geojson("pan_admin3.geojson") |
|
|
|
|
|
total = len(self.admin0) + len(self.admin1) + len(self.admin2) + len(self.admin3) |
|
|
print(f"Total features loaded: {total}") |
|
|
print("=" * 50) |
|
|
|
|
|
def get_schema_context(self) -> str: |
|
|
"""Return schema description for LLM context.""" |
|
|
return """ |
|
|
Panama Geographic Data (HDX Administrative Boundaries): |
|
|
|
|
|
1. admin0 (Country Level) |
|
|
- adm0_name: "Panamá" |
|
|
- adm0_pcode: "PA" |
|
|
- area_sqkm: country area in square kilometers |
|
|
- geometry: MultiPolygon |
|
|
|
|
|
2. admin1 (Provinces - 13 total) |
|
|
- adm1_name: Province name (e.g., "Bocas del Toro", "Panamá", "Colón") |
|
|
- adm1_pcode: Province code (e.g., "PA01", "PA08") |
|
|
- adm0_name: "Panamá" |
|
|
- area_sqkm: province area |
|
|
- center_lat, center_lon: centroid coordinates |
|
|
- geometry: MultiPolygon |
|
|
|
|
|
3. admin2 (Districts - 76 total) |
|
|
- adm2_name: District name |
|
|
- adm2_pcode: District code (e.g., "PA0101") |
|
|
- adm1_name: Parent province name |
|
|
- adm1_pcode: Parent province code |
|
|
- area_sqkm: district area |
|
|
- center_lat, center_lon: centroid coordinates |
|
|
- geometry: MultiPolygon |
|
|
|
|
|
4. admin3 (Corregimientos - 594 total) |
|
|
- adm3_name: Corregimiento name |
|
|
- adm3_pcode: Corregimiento code (e.g., "PA010101") |
|
|
- adm2_name: Parent district name |
|
|
- adm2_pcode: Parent district code |
|
|
- adm1_name: Parent province name |
|
|
- area_sqkm: corregimiento area |
|
|
- center_lat, center_lon: centroid coordinates |
|
|
- geometry: MultiPolygon |
|
|
|
|
|
Notes: |
|
|
- All geometries use WGS84 (EPSG:4326) coordinate system |
|
|
- P-codes follow ISO 3166-2 format |
|
|
- Valid as of 2021-10-20 |
|
|
""" |
|
|
|
|
|
def get_data_citations(self, admin_levels: List[str]) -> List[str]: |
|
|
"""Return citations for the queried data.""" |
|
|
citations = [] |
|
|
level_names = { |
|
|
"admin0": "Panama Country Boundary", |
|
|
"admin1": "Panama Provinces", |
|
|
"admin2": "Panama Districts", |
|
|
"admin3": "Panama Corregimientos" |
|
|
} |
|
|
|
|
|
for level in admin_levels: |
|
|
if level in level_names: |
|
|
citations.append(f"{level_names[level]} (HDX COD-AB, 2021)") |
|
|
|
|
|
return citations if citations else ["Panama Administrative Boundaries (HDX COD-AB, 2021)"] |
|
|
|
|
|
def search_by_name( |
|
|
self, |
|
|
name: str, |
|
|
admin_level: Optional[str] = None, |
|
|
limit: int = 50 |
|
|
) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Search for features by name (case-insensitive partial match). |
|
|
|
|
|
Args: |
|
|
name: Search term |
|
|
admin_level: Optional filter ("admin1", "admin2", "admin3") |
|
|
limit: Maximum results to return |
|
|
""" |
|
|
name_lower = name.lower() |
|
|
results = [] |
|
|
|
|
|
levels_to_search = [] |
|
|
if admin_level: |
|
|
levels_to_search = [(admin_level, getattr(self, admin_level, []))] |
|
|
else: |
|
|
levels_to_search = [ |
|
|
("admin1", self.admin1), |
|
|
("admin2", self.admin2), |
|
|
("admin3", self.admin3) |
|
|
] |
|
|
|
|
|
for level_name, features in levels_to_search: |
|
|
for feature in features: |
|
|
props = feature.get("properties", {}) |
|
|
|
|
|
|
|
|
for key in ["adm1_name", "adm2_name", "adm3_name", "adm0_name"]: |
|
|
value = props.get(key, "") |
|
|
if value and name_lower in value.lower(): |
|
|
results.append({ |
|
|
"level": level_name, |
|
|
"feature": feature |
|
|
}) |
|
|
break |
|
|
|
|
|
if len(results) >= limit: |
|
|
break |
|
|
|
|
|
if len(results) >= limit: |
|
|
break |
|
|
|
|
|
return results |
|
|
|
|
|
def get_all_provinces(self) -> List[Dict[str, Any]]: |
|
|
"""Get all provinces (admin1).""" |
|
|
return self.admin1 |
|
|
|
|
|
def get_all_districts(self, province_pcode: Optional[str] = None) -> List[Dict[str, Any]]: |
|
|
"""Get all districts, optionally filtered by province.""" |
|
|
if province_pcode: |
|
|
return [ |
|
|
f for f in self.admin2 |
|
|
if f.get("properties", {}).get("adm1_pcode") == province_pcode |
|
|
] |
|
|
return self.admin2 |
|
|
|
|
|
def get_all_corregimientos( |
|
|
self, |
|
|
district_pcode: Optional[str] = None, |
|
|
province_pcode: Optional[str] = None |
|
|
) -> List[Dict[str, Any]]: |
|
|
"""Get all corregimientos, optionally filtered.""" |
|
|
results = self.admin3 |
|
|
|
|
|
if district_pcode: |
|
|
results = [ |
|
|
f for f in results |
|
|
if f.get("properties", {}).get("adm2_pcode") == district_pcode |
|
|
] |
|
|
elif province_pcode: |
|
|
results = [ |
|
|
f for f in results |
|
|
if f.get("properties", {}).get("adm1_pcode") == province_pcode |
|
|
] |
|
|
|
|
|
return results |
|
|
|
|
|
def get_by_pcode(self, pcode: str) -> Optional[Dict[str, Any]]: |
|
|
"""Get a feature by its P-code.""" |
|
|
pcode_upper = pcode.upper() |
|
|
|
|
|
|
|
|
if len(pcode_upper) == 2: |
|
|
for f in self.admin0: |
|
|
if f.get("properties", {}).get("adm0_pcode") == pcode_upper: |
|
|
return f |
|
|
elif len(pcode_upper) == 4: |
|
|
for f in self.admin1: |
|
|
if f.get("properties", {}).get("adm1_pcode") == pcode_upper: |
|
|
return f |
|
|
elif len(pcode_upper) == 6: |
|
|
for f in self.admin2: |
|
|
if f.get("properties", {}).get("adm2_pcode") == pcode_upper: |
|
|
return f |
|
|
elif len(pcode_upper) == 8: |
|
|
for f in self.admin3: |
|
|
if f.get("properties", {}).get("adm3_pcode") == pcode_upper: |
|
|
return f |
|
|
|
|
|
return None |
|
|
|
|
|
def to_geojson(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: |
|
|
"""Convert a list of features to a GeoJSON FeatureCollection.""" |
|
|
|
|
|
clean_features = [] |
|
|
for f in features: |
|
|
if "feature" in f: |
|
|
clean_features.append(f["feature"]) |
|
|
else: |
|
|
clean_features.append(f) |
|
|
|
|
|
return { |
|
|
"type": "FeatureCollection", |
|
|
"features": clean_features |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
_data_loader: Optional[PanamaDataLoader] = None |
|
|
|
|
|
|
|
|
def get_data_loader() -> PanamaDataLoader: |
|
|
"""Get the singleton data loader instance.""" |
|
|
global _data_loader |
|
|
if _data_loader is None: |
|
|
_data_loader = PanamaDataLoader() |
|
|
return _data_loader |
|
|
|