""" Data Loader Service for Panama Geographic Data Loads GeoJSON files from the data/raw directory and provides query capabilities for the LLM to search and filter features. """ import os import json from typing import List, Dict, Any, Optional from functools import lru_cache class PanamaDataLoader: """ Singleton service to load and query Panama geographic data. Loads data once on first access and caches in memory. """ _instance = None _data_loaded = False # Data storage admin0: List[Dict[str, Any]] = [] # Country admin1: List[Dict[str, Any]] = [] # Provinces (13) admin2: List[Dict[str, Any]] = [] # Districts (76) admin3: List[Dict[str, Any]] = [] # Corregimientos (594) def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) return cls._instance def __init__(self): if not PanamaDataLoader._data_loaded: self._load_data() PanamaDataLoader._data_loaded = True def _get_data_path(self) -> str: """Get the path to the data/raw directory.""" # Navigate from backend/services to project root current_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(os.path.dirname(current_dir)) return os.path.join(project_root, "data", "raw") def _load_geojson(self, filename: str) -> List[Dict[str, Any]]: """Load a GeoJSON file and return its features.""" filepath = os.path.join(self._get_data_path(), filename) if not os.path.exists(filepath): print(f"Warning: {filepath} not found") return [] try: with open(filepath, 'r', encoding='utf-8') as f: data = json.load(f) features = data.get('features', []) print(f" Loaded {len(features)} features from {filename}") return features except Exception as e: print(f"Error loading {filename}: {e}") return [] def _load_data(self): """Load all GeoJSON data files.""" print("=" * 50) print("Loading Panama Geographic Data...") print("=" * 50) self.admin0 = self._load_geojson("pan_admin0.geojson") self.admin1 = self._load_geojson("pan_admin1.geojson") self.admin2 = self._load_geojson("pan_admin2.geojson") self.admin3 = self._load_geojson("pan_admin3.geojson") total = len(self.admin0) + len(self.admin1) + len(self.admin2) + len(self.admin3) print(f"Total features loaded: {total}") print("=" * 50) def get_schema_context(self) -> str: """Return schema description for LLM context.""" return """ Panama Geographic Data (HDX Administrative Boundaries): 1. admin0 (Country Level) - adm0_name: "Panamá" - adm0_pcode: "PA" - area_sqkm: country area in square kilometers - geometry: MultiPolygon 2. admin1 (Provinces - 13 total) - adm1_name: Province name (e.g., "Bocas del Toro", "Panamá", "Colón") - adm1_pcode: Province code (e.g., "PA01", "PA08") - adm0_name: "Panamá" - area_sqkm: province area - center_lat, center_lon: centroid coordinates - geometry: MultiPolygon 3. admin2 (Districts - 76 total) - adm2_name: District name - adm2_pcode: District code (e.g., "PA0101") - adm1_name: Parent province name - adm1_pcode: Parent province code - area_sqkm: district area - center_lat, center_lon: centroid coordinates - geometry: MultiPolygon 4. admin3 (Corregimientos - 594 total) - adm3_name: Corregimiento name - adm3_pcode: Corregimiento code (e.g., "PA010101") - adm2_name: Parent district name - adm2_pcode: Parent district code - adm1_name: Parent province name - area_sqkm: corregimiento area - center_lat, center_lon: centroid coordinates - geometry: MultiPolygon Notes: - All geometries use WGS84 (EPSG:4326) coordinate system - P-codes follow ISO 3166-2 format - Valid as of 2021-10-20 """ def get_data_citations(self, admin_levels: List[str]) -> List[str]: """Return citations for the queried data.""" citations = [] level_names = { "admin0": "Panama Country Boundary", "admin1": "Panama Provinces", "admin2": "Panama Districts", "admin3": "Panama Corregimientos" } for level in admin_levels: if level in level_names: citations.append(f"{level_names[level]} (HDX COD-AB, 2021)") return citations if citations else ["Panama Administrative Boundaries (HDX COD-AB, 2021)"] def search_by_name( self, name: str, admin_level: Optional[str] = None, limit: int = 50 ) -> List[Dict[str, Any]]: """ Search for features by name (case-insensitive partial match). Args: name: Search term admin_level: Optional filter ("admin1", "admin2", "admin3") limit: Maximum results to return """ name_lower = name.lower() results = [] levels_to_search = [] if admin_level: levels_to_search = [(admin_level, getattr(self, admin_level, []))] else: levels_to_search = [ ("admin1", self.admin1), ("admin2", self.admin2), ("admin3", self.admin3) ] for level_name, features in levels_to_search: for feature in features: props = feature.get("properties", {}) # Check various name fields for key in ["adm1_name", "adm2_name", "adm3_name", "adm0_name"]: value = props.get(key, "") if value and name_lower in value.lower(): results.append({ "level": level_name, "feature": feature }) break if len(results) >= limit: break if len(results) >= limit: break return results def get_all_provinces(self) -> List[Dict[str, Any]]: """Get all provinces (admin1).""" return self.admin1 def get_all_districts(self, province_pcode: Optional[str] = None) -> List[Dict[str, Any]]: """Get all districts, optionally filtered by province.""" if province_pcode: return [ f for f in self.admin2 if f.get("properties", {}).get("adm1_pcode") == province_pcode ] return self.admin2 def get_all_corregimientos( self, district_pcode: Optional[str] = None, province_pcode: Optional[str] = None ) -> List[Dict[str, Any]]: """Get all corregimientos, optionally filtered.""" results = self.admin3 if district_pcode: results = [ f for f in results if f.get("properties", {}).get("adm2_pcode") == district_pcode ] elif province_pcode: results = [ f for f in results if f.get("properties", {}).get("adm1_pcode") == province_pcode ] return results def get_by_pcode(self, pcode: str) -> Optional[Dict[str, Any]]: """Get a feature by its P-code.""" pcode_upper = pcode.upper() # Determine level by P-code length if len(pcode_upper) == 2: # Country for f in self.admin0: if f.get("properties", {}).get("adm0_pcode") == pcode_upper: return f elif len(pcode_upper) == 4: # Province for f in self.admin1: if f.get("properties", {}).get("adm1_pcode") == pcode_upper: return f elif len(pcode_upper) == 6: # District for f in self.admin2: if f.get("properties", {}).get("adm2_pcode") == pcode_upper: return f elif len(pcode_upper) == 8: # Corregimiento for f in self.admin3: if f.get("properties", {}).get("adm3_pcode") == pcode_upper: return f return None def to_geojson(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: """Convert a list of features to a GeoJSON FeatureCollection.""" # Handle both raw features and wrapped results from search clean_features = [] for f in features: if "feature" in f: clean_features.append(f["feature"]) else: clean_features.append(f) return { "type": "FeatureCollection", "features": clean_features } # Singleton instance _data_loader: Optional[PanamaDataLoader] = None def get_data_loader() -> PanamaDataLoader: """Get the singleton data loader instance.""" global _data_loader if _data_loader is None: _data_loader = PanamaDataLoader() return _data_loader