davidtran999 commited on
Commit
0821b38
·
verified ·
1 Parent(s): 66cb7c5

Upload backend/scripts/build_faiss_index.py with huggingface_hub

Browse files
backend/scripts/build_faiss_index.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Script to build FAISS indexes for all models.
3
+ """
4
+ import argparse
5
+ import os
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ ROOT_DIR = Path(__file__).resolve().parents[2]
10
+ BACKEND_DIR = ROOT_DIR / "backend"
11
+ HUE_PORTAL_DIR = BACKEND_DIR / "hue_portal"
12
+
13
+ # Add backend directory to sys.path so Django can find hue_portal package
14
+ # Django needs to import hue_portal.hue_portal.settings, so backend/ must be in path
15
+ # IMPORTANT: Only add BACKEND_DIR, not HUE_PORTAL_DIR, because Django needs to find
16
+ # the hue_portal package (which is in backend/hue_portal), not the hue_portal directory itself
17
+ if str(BACKEND_DIR) not in sys.path:
18
+ sys.path.insert(0, str(BACKEND_DIR))
19
+
20
+ # Add root for other imports if needed (but not HUE_PORTAL_DIR as it breaks Django imports)
21
+ if str(ROOT_DIR) not in sys.path:
22
+ sys.path.insert(0, str(ROOT_DIR))
23
+
24
+ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "hue_portal.hue_portal.settings")
25
+
26
+ import django
27
+ django.setup()
28
+
29
+ from hue_portal.core.models import (
30
+ Procedure,
31
+ Fine,
32
+ Office,
33
+ Advisory,
34
+ LegalSection,
35
+ )
36
+ from hue_portal.core.faiss_index import build_faiss_index_for_model
37
+
38
+
39
+ def main():
40
+ parser = argparse.ArgumentParser(description="Build FAISS indexes for models")
41
+ parser.add_argument("--model", choices=["procedure", "fine", "office", "advisory", "legal", "all"],
42
+ default="all", help="Which model to process")
43
+ parser.add_argument("--index-type", choices=["Flat", "IVF", "HNSW"], default="IVF",
44
+ help="Type of FAISS index")
45
+ args = parser.parse_args()
46
+
47
+ print("="*60)
48
+ print("FAISS Index Builder")
49
+ print("="*60)
50
+
51
+ models_to_process = []
52
+ if args.model == "all":
53
+ models_to_process = [
54
+ (Procedure, "Procedure"),
55
+ (Fine, "Fine"),
56
+ (Office, "Office"),
57
+ (Advisory, "Advisory"),
58
+ (LegalSection, "LegalSection"),
59
+ ]
60
+ else:
61
+ model_map = {
62
+ "procedure": (Procedure, "Procedure"),
63
+ "fine": (Fine, "Fine"),
64
+ "office": (Office, "Office"),
65
+ "advisory": (Advisory, "Advisory"),
66
+ "legal": (LegalSection, "LegalSection"),
67
+ }
68
+ if args.model in model_map:
69
+ models_to_process = [model_map[args.model]]
70
+
71
+ for model_class, model_name in models_to_process:
72
+ try:
73
+ build_faiss_index_for_model(model_class, model_name, index_type=args.index_type)
74
+ except Exception as e:
75
+ print(f"❌ Error building index for {model_name}: {e}")
76
+
77
+ print("\n" + "="*60)
78
+ print("Index building complete")
79
+ print("="*60)
80
+
81
+
82
+ if __name__ == "__main__":
83
+ main()
84
+