File size: 9,302 Bytes
4851501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
"""
Data Loader Service for Panama Geographic Data

Loads GeoJSON files from the data/raw directory and provides
query capabilities for the LLM to search and filter features.
"""

import os
import json
from typing import List, Dict, Any, Optional
from functools import lru_cache


class PanamaDataLoader:
    """
    Singleton service to load and query Panama geographic data.
    Loads data once on first access and caches in memory.
    """
    
    _instance = None
    _data_loaded = False
    
    # Data storage
    admin0: List[Dict[str, Any]] = []  # Country
    admin1: List[Dict[str, Any]] = []  # Provinces (13)
    admin2: List[Dict[str, Any]] = []  # Districts (76)
    admin3: List[Dict[str, Any]] = []  # Corregimientos (594)
    
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance
    
    def __init__(self):
        if not PanamaDataLoader._data_loaded:
            self._load_data()
            PanamaDataLoader._data_loaded = True
    
    def _get_data_path(self) -> str:
        """Get the path to the data/raw directory."""
        # Navigate from backend/services to project root
        current_dir = os.path.dirname(os.path.abspath(__file__))
        project_root = os.path.dirname(os.path.dirname(current_dir))
        return os.path.join(project_root, "data", "raw")
    
    def _load_geojson(self, filename: str) -> List[Dict[str, Any]]:
        """Load a GeoJSON file and return its features."""
        filepath = os.path.join(self._get_data_path(), filename)
        
        if not os.path.exists(filepath):
            print(f"Warning: {filepath} not found")
            return []
        
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                data = json.load(f)
                features = data.get('features', [])
                print(f"  Loaded {len(features)} features from {filename}")
                return features
        except Exception as e:
            print(f"Error loading {filename}: {e}")
            return []
    
    def _load_data(self):
        """Load all GeoJSON data files."""
        print("=" * 50)
        print("Loading Panama Geographic Data...")
        print("=" * 50)
        
        self.admin0 = self._load_geojson("pan_admin0.geojson")
        self.admin1 = self._load_geojson("pan_admin1.geojson")
        self.admin2 = self._load_geojson("pan_admin2.geojson")
        self.admin3 = self._load_geojson("pan_admin3.geojson")
        
        total = len(self.admin0) + len(self.admin1) + len(self.admin2) + len(self.admin3)
        print(f"Total features loaded: {total}")
        print("=" * 50)
    
    def get_schema_context(self) -> str:
        """Return schema description for LLM context."""
        return """
Panama Geographic Data (HDX Administrative Boundaries):

1. admin0 (Country Level)
   - adm0_name: "Panamá"
   - adm0_pcode: "PA"
   - area_sqkm: country area in square kilometers
   - geometry: MultiPolygon

2. admin1 (Provinces - 13 total)
   - adm1_name: Province name (e.g., "Bocas del Toro", "Panamá", "Colón")
   - adm1_pcode: Province code (e.g., "PA01", "PA08")
   - adm0_name: "Panamá"
   - area_sqkm: province area
   - center_lat, center_lon: centroid coordinates
   - geometry: MultiPolygon

3. admin2 (Districts - 76 total)
   - adm2_name: District name
   - adm2_pcode: District code (e.g., "PA0101")
   - adm1_name: Parent province name
   - adm1_pcode: Parent province code
   - area_sqkm: district area
   - center_lat, center_lon: centroid coordinates
   - geometry: MultiPolygon

4. admin3 (Corregimientos - 594 total)
   - adm3_name: Corregimiento name
   - adm3_pcode: Corregimiento code (e.g., "PA010101")
   - adm2_name: Parent district name
   - adm2_pcode: Parent district code
   - adm1_name: Parent province name
   - area_sqkm: corregimiento area
   - center_lat, center_lon: centroid coordinates
   - geometry: MultiPolygon

Notes:
- All geometries use WGS84 (EPSG:4326) coordinate system
- P-codes follow ISO 3166-2 format
- Valid as of 2021-10-20
"""
    
    def get_data_citations(self, admin_levels: List[str]) -> List[str]:
        """Return citations for the queried data."""
        citations = []
        level_names = {
            "admin0": "Panama Country Boundary",
            "admin1": "Panama Provinces",
            "admin2": "Panama Districts", 
            "admin3": "Panama Corregimientos"
        }
        
        for level in admin_levels:
            if level in level_names:
                citations.append(f"{level_names[level]} (HDX COD-AB, 2021)")
        
        return citations if citations else ["Panama Administrative Boundaries (HDX COD-AB, 2021)"]
    
    def search_by_name(
        self, 
        name: str, 
        admin_level: Optional[str] = None,
        limit: int = 50
    ) -> List[Dict[str, Any]]:
        """
        Search for features by name (case-insensitive partial match).
        
        Args:
            name: Search term
            admin_level: Optional filter ("admin1", "admin2", "admin3")
            limit: Maximum results to return
        """
        name_lower = name.lower()
        results = []
        
        levels_to_search = []
        if admin_level:
            levels_to_search = [(admin_level, getattr(self, admin_level, []))]
        else:
            levels_to_search = [
                ("admin1", self.admin1),
                ("admin2", self.admin2),
                ("admin3", self.admin3)
            ]
        
        for level_name, features in levels_to_search:
            for feature in features:
                props = feature.get("properties", {})
                
                # Check various name fields
                for key in ["adm1_name", "adm2_name", "adm3_name", "adm0_name"]:
                    value = props.get(key, "")
                    if value and name_lower in value.lower():
                        results.append({
                            "level": level_name,
                            "feature": feature
                        })
                        break
                
                if len(results) >= limit:
                    break
            
            if len(results) >= limit:
                break
        
        return results
    
    def get_all_provinces(self) -> List[Dict[str, Any]]:
        """Get all provinces (admin1)."""
        return self.admin1
    
    def get_all_districts(self, province_pcode: Optional[str] = None) -> List[Dict[str, Any]]:
        """Get all districts, optionally filtered by province."""
        if province_pcode:
            return [
                f for f in self.admin2 
                if f.get("properties", {}).get("adm1_pcode") == province_pcode
            ]
        return self.admin2
    
    def get_all_corregimientos(
        self, 
        district_pcode: Optional[str] = None,
        province_pcode: Optional[str] = None
    ) -> List[Dict[str, Any]]:
        """Get all corregimientos, optionally filtered."""
        results = self.admin3
        
        if district_pcode:
            results = [
                f for f in results 
                if f.get("properties", {}).get("adm2_pcode") == district_pcode
            ]
        elif province_pcode:
            results = [
                f for f in results 
                if f.get("properties", {}).get("adm1_pcode") == province_pcode
            ]
        
        return results
    
    def get_by_pcode(self, pcode: str) -> Optional[Dict[str, Any]]:
        """Get a feature by its P-code."""
        pcode_upper = pcode.upper()
        
        # Determine level by P-code length
        if len(pcode_upper) == 2:  # Country
            for f in self.admin0:
                if f.get("properties", {}).get("adm0_pcode") == pcode_upper:
                    return f
        elif len(pcode_upper) == 4:  # Province
            for f in self.admin1:
                if f.get("properties", {}).get("adm1_pcode") == pcode_upper:
                    return f
        elif len(pcode_upper) == 6:  # District
            for f in self.admin2:
                if f.get("properties", {}).get("adm2_pcode") == pcode_upper:
                    return f
        elif len(pcode_upper) == 8:  # Corregimiento
            for f in self.admin3:
                if f.get("properties", {}).get("adm3_pcode") == pcode_upper:
                    return f
        
        return None
    
    def to_geojson(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Convert a list of features to a GeoJSON FeatureCollection."""
        # Handle both raw features and wrapped results from search
        clean_features = []
        for f in features:
            if "feature" in f:
                clean_features.append(f["feature"])
            else:
                clean_features.append(f)
        
        return {
            "type": "FeatureCollection",
            "features": clean_features
        }


# Singleton instance
_data_loader: Optional[PanamaDataLoader] = None


def get_data_loader() -> PanamaDataLoader:
    """Get the singleton data loader instance."""
    global _data_loader
    if _data_loader is None:
        _data_loader = PanamaDataLoader()
    return _data_loader