GerardCB commited on
Commit
db450fb
·
1 Parent(s): 6414190

Fix: Remove build-time download script

Browse files
backend/scripts/download_hdx_panama.py DELETED
@@ -1,105 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Download Panama-specific datasets from HDX
4
- """
5
-
6
- import requests
7
- import geopandas as gpd
8
- from pathlib import Path
9
- import logging
10
- import zipfile
11
- import io
12
-
13
- logging.basicConfig(level=logging.INFO)
14
- logger = logging.getLogger(__name__)
15
-
16
- DATA_DIR = Path(__file__).parent.parent / "data" / "hdx"
17
-
18
- # HDX Dataset URLs (Panama-specific)
19
- HDX_DATASETS = {
20
- "waterways": {
21
- "url": "https://data.humdata.org/dataset/9b925ead-6034-4ce8-92d9-45d3a1ece1fc/resource/e0dd9e95-5b04-4a5c-b7ef-31a2ea046e1c/download/hotosm_pan_waterways_lines_geojson.zip",
22
- "description": "Panama Waterways from OpenStreetMap"
23
- },
24
- "road_surface": {
25
- "url": "https://data.humdata.org/dataset/c55bf26a-eba6-402d-b004-8c4af8c24b39/resource/c03fa6cc-e698-4c10-8b05-77de91e13e86/download/panama_roads.geojson",
26
- "description": "Panama Road Surface Data (AI-predicted paved/unpaved)"
27
- },
28
- "admin_3": {
29
- "url": "https://data.humdata.org/dataset/d188544c-352b-419b-a489-0ae6b763bf21/resource/119d6756-749e-4e4f-bf3a-9694ce22df0a/download/pan_admin3_2021.geojson",
30
- "description": "Panama Admin 3 (Corregimientos) Boundaries"
31
- },
32
- "admin_lines": {
33
- "url": "https://data.humdata.org/dataset/d188544c-352b-419b-a489-0ae6b763bf21/resource/d7981358-867c-4034-aa1e-07d0f419c968/download/pan_admin_lines_2021.geojson",
34
- "description": "Panama Admin Lines"
35
- }
36
- }
37
-
38
- def download_and_extract_hdx(dataset_name, url, description):
39
- """Download and extract HDX dataset"""
40
- logger.info(f"Downloading {description}...")
41
-
42
- output_dir = DATA_DIR / dataset_name
43
- output_dir.mkdir(parents=True, exist_ok=True)
44
-
45
- try:
46
- response = requests.get(url, timeout=60)
47
- response.raise_for_status()
48
-
49
- # Check if ZIP or direct GeoJSON
50
- if url.endswith('.zip'):
51
- # Extract ZIP
52
- with zipfile.ZipFile(io.BytesIO(response.content)) as z:
53
- z.extractall(output_dir)
54
- logger.info(f"Extracted ZIP to {output_dir}")
55
-
56
- # Find GeoJSON file
57
- geojson_files = list(output_dir.glob("*.geojson"))
58
- if geojson_files:
59
- geojson_path = geojson_files[0]
60
- gdf = gpd.read_file(geojson_path)
61
- logger.info(f"Loaded {len(gdf)} features from {geojson_path.name}")
62
- return geojson_path, len(gdf)
63
- else:
64
- # Direct GeoJSON
65
- if dataset_name == "admin_3":
66
- output_dir = DATA_DIR.parent / "base"
67
- geojson_path = output_dir / "pan_admin3.geojson"
68
- elif dataset_name == "admin_lines":
69
- output_dir = DATA_DIR.parent / "base"
70
- geojson_path = output_dir / "pan_adminlines.geojson"
71
- else:
72
- # Default behavior
73
- geojson_path = output_dir / f"{dataset_name}.geojson"
74
-
75
- # Ensure directory exists (critical for Docker build)
76
- output_dir.mkdir(parents=True, exist_ok=True)
77
-
78
- with open(geojson_path, 'wb') as f:
79
- f.write(response.content)
80
-
81
- gdf = gpd.read_file(geojson_path)
82
- logger.info(f"Loaded {len(gdf)} features")
83
- return geojson_path, len(gdf)
84
-
85
- except Exception as e:
86
- logger.error(f"Failed to download {dataset_name}: {e}")
87
- return None, 0
88
-
89
- def main():
90
- logger.info("=== Downloading HDX Panama Datasets ===")
91
-
92
- results = []
93
- for name, info in HDX_DATASETS.items():
94
- path, count = download_and_extract_hdx(name, info["url"], info["description"])
95
- if path:
96
- results.append({"dataset": name, "count": count, "path": path})
97
-
98
- logger.info("\n=== Download Summary ===")
99
- for result in results:
100
- logger.info(f" {result['dataset']}: {result['count']} features")
101
-
102
- return results
103
-
104
- if __name__ == "__main__":
105
- main()