abdelrahman-a99 commited on
Commit
efa9374
·
1 Parent(s): c1b0ea3

Separate rag_core.py (the core logic) and app.py(only Gradio UI + API only) and import the answer_query function from rag_core and call it in on_ask function

Browse files
Files changed (3) hide show
  1. app.py +14 -289
  2. rag_core.py +307 -0
  3. requirements.txt +0 -3
app.py CHANGED
@@ -1,301 +1,25 @@
1
- import os, json, glob, pickle, re
2
- from typing import List, Dict
3
- import faiss
4
- import torch
5
  import traceback
6
  import gradio as gr
7
 
8
- from langdetect import detect
9
- from unidecode import unidecode
10
-
11
- from sentence_transformers import SentenceTransformer
12
- from huggingface_hub import hf_hub_download
13
- from llama_cpp import Llama
14
-
15
- # ===============================
16
- # CONFIG
17
- # ===============================
18
- # Project paths (can be overridden with Space “Variables” if you like)
19
- DATA_DIR = os.getenv("DATA_DIR", "./data/pages")
20
- INDEX_PATH = os.getenv("INDEX_PATH", "./artifacts/policy.index")
21
- DOC_STORE_PATH = os.getenv("DOC_STORE_PATH", "./artifacts/policy_docs.pkl")
22
- ARTIFACT_DIR = os.path.dirname(INDEX_PATH) or "."
23
- os.makedirs(ARTIFACT_DIR, exist_ok=True)
24
-
25
- # Embeddings (multilingual e5; remember to prefix "query:" and "passage:")
26
- EMBED_MODEL = os.getenv("EMBED_MODEL", "intfloat/multilingual-e5-base")
27
-
28
- # LLM served on CPU via llama.cpp using a quantized GGUF of Qwen 3B Instruct
29
- GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "Qwen/Qwen2.5-3B-Instruct-GGUF")
30
- GGUF_FILENAME = os.getenv("GGUF_FILENAME", "qwen2.5-3b-instruct-q4_k_m.gguf") # adjust if RAM is tight
31
-
32
- TOP_K = int(os.getenv("TOP_K", "5"))
33
- MAX_CTX_CHARS = int(os.getenv("MAX_CTX_CHARS", "5000"))
34
-
35
- N_CTX = int(os.getenv("N_CTX", "4096"))
36
- MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "140"))
37
-
38
- INSUFFICIENT_EN = "Insufficient context"
39
- INSUFFICIENT_AR = "لا تتوفر معلومة كافية"
40
-
41
- # ===============================
42
- # HELPERS
43
- # ===============================
44
- AR_REGEX = re.compile(r'[\u0600-\u06FF]')
45
-
46
- def detect_lang(text: str) -> str:
47
- if AR_REGEX.search(text or ""):
48
- return "ar"
49
- try:
50
- return "ar" if detect(text or "") == "ar" else "en"
51
- except:
52
- return "en"
53
-
54
- def normalize_q(text: str) -> str:
55
- return re.sub(r'\s+', ' ', (text or "")).strip()
56
-
57
- def make_citation(d):
58
- pg = d.get("page_number", "?")
59
- sec = d.get("section", d.get("tag", "")) or ""
60
- return f"p.{pg}" + (f" — {sec}" if sec else "")
61
-
62
- def truncate_ctx(s: str, limit: int = MAX_CTX_CHARS) -> str:
63
- return s if len(s) <= limit else s[:limit] + "\n[...]"
64
-
65
- # ===============================
66
- # DATA LOADING & INDEXING
67
- # ===============================
68
- def load_policy_jsons(folder: str):
69
- docs = []
70
- files = sorted(glob.glob(os.path.join(folder, "*.json")))
71
- for fp in files:
72
- try:
73
- with open(fp, "r", encoding="utf-8") as f:
74
- data = json.load(f)
75
-
76
- page_num = data.get("page_number")
77
- section = data.get("doc_title", {}).get("en", "") or data.get("doc_title", {}).get("ar", "")
78
- qas = data.get("qas", [])
79
-
80
- for qa in qas:
81
- cid = qa.get("canonical_id") or qa.get("id") or os.path.basename(fp)
82
- q_ar = normalize_q( qa.get("question", {}).get("ar", "") )
83
- q_en = normalize_q( qa.get("question", {}).get("en", "") )
84
- a_ar = normalize_q( qa.get("answer", {}).get("ar", "") )
85
- a_en = normalize_q( qa.get("answer", {}).get("en", "") )
86
-
87
- if q_ar or a_ar:
88
- docs.append({
89
- "id": cid + "::ar",
90
- "lang": "ar",
91
- "question": q_ar,
92
- "answer": a_ar,
93
- "page_number": page_num,
94
- "section": section,
95
- "source_file": fp
96
- })
97
- if q_en or a_en:
98
- docs.append({
99
- "id": cid + "::en",
100
- "lang": "en",
101
- "question": q_en,
102
- "answer": a_en,
103
- "page_number": page_num,
104
- "section": section,
105
- "source_file": fp
106
- })
107
- except Exception as e:
108
- print(f"Error reading {fp}: {e}")
109
-
110
- print(f"Loaded {len(docs)} QA passages from {len(files)} files.")
111
- return docs
112
-
113
- def passages_text(d):
114
- q = d.get("question") or ""
115
- a = d.get("answer") or ""
116
- base = f"Q: {q}\nA: {a}\nSource: page {d.get('page_number','?')}"
117
- return "passage: " + base
118
-
119
- def build_index(docs, embedder, index_path, doc_store_path):
120
- if not docs:
121
- raise ValueError("No documents found to index.")
122
- texts = [passages_text(d) for d in docs]
123
- emb = embedder.encode(texts, convert_to_numpy=True, show_progress_bar=True, batch_size=64)
124
- faiss.normalize_L2(emb)
125
- index = faiss.IndexFlatIP(embedder.get_sentence_embedding_dimension())
126
- index.add(emb)
127
- faiss.write_index(index, index_path)
128
- with open(doc_store_path, "wb") as f:
129
- pickle.dump(docs, f)
130
- print(f"Index built: {len(docs)} items.")
131
-
132
- def load_index():
133
- if not (os.path.exists(INDEX_PATH) and os.path.exists(DOC_STORE_PATH)):
134
- if not os.path.isdir(DATA_DIR):
135
- raise FileNotFoundError(f"DATA_DIR not found: {DATA_DIR}")
136
- docs = load_policy_jsons(DATA_DIR)
137
- if not docs:
138
- raise FileNotFoundError(f"No JSON files found in {DATA_DIR}. Please add your page JSON files.")
139
- print("Building index...")
140
- embedder = SentenceTransformer(EMBED_MODEL, device="cpu")
141
- build_index(docs, embedder, INDEX_PATH, DOC_STORE_PATH)
142
-
143
- index = faiss.read_index(INDEX_PATH)
144
- with open(DOC_STORE_PATH, "rb") as f:
145
- docs = pickle.load(f)
146
- return index, docs
147
-
148
- try:
149
- INDEX, DOCS = load_index()
150
- except Exception as e:
151
- print("Failed to load/build FAISS index:", e)
152
- INDEX, DOCS = None, []
153
-
154
- try:
155
- EMBEDDER = SentenceTransformer(EMBED_MODEL, device="cpu")
156
- except Exception as e:
157
- print("Failed to load embedder:", e)
158
- EMBEDDER = None
159
-
160
- # ===============================
161
- # LLM (llama.cpp CPU) setup
162
- # ===============================
163
- def get_llm() -> Llama:
164
- # Download the GGUF quantized model locally into ./models
165
- local_path = hf_hub_download(
166
- repo_id=GGUF_REPO_ID,
167
- filename=GGUF_FILENAME,
168
- local_dir="./models",
169
- local_dir_use_symlinks=False
170
- )
171
- # Keep context moderate for free-CPU memory
172
- return Llama(
173
- model_path=local_path,
174
- n_threads=max(2, os.cpu_count() or 2),
175
- n_ctx=N_CTX,
176
- chat_format="qwen", # llama.cpp supports qwen2/qwen2.5 chat template
177
- verbose=False
178
- )
179
-
180
- try:
181
- LLM = get_llm()
182
- except Exception as e:
183
- print("Failed to init LLM:", e)
184
- LLM = None
185
-
186
- # ===============================
187
- # RETRIEVAL + GENERATION
188
- # ===============================
189
- def retrieve(query_text: str, top_k: int = TOP_K, lang_hint: str = None):
190
- q_emb = EMBEDDER.encode(["query: " + (query_text or "")], convert_to_numpy=True)
191
- faiss.normalize_L2(q_emb)
192
- D, I = INDEX.search(q_emb, top_k * 2) # pull more, filter by language
193
- lang = lang_hint or detect_lang(query_text or "")
194
- same_lang, others = [], []
195
- for i in I[0]:
196
- if i < 0 or i >= len(DOCS):
197
- continue
198
- d = DOCS[i]
199
- (same_lang if d.get("lang") == lang else others).append(d)
200
- out = same_lang[:top_k]
201
- if not out:
202
- return out
203
- if len(out) < top_k:
204
- out.extend(others[:top_k - len(out)])
205
- return out[:top_k]
206
-
207
- def build_messages(user_q: str, passages: List[Dict]):
208
- lang = detect_lang(user_q or "")
209
-
210
- sys_en = (
211
- "You are NU-CS Policy Assistant. Answer ONLY using the provided context. "
212
- "If the requested person/course/section is NOT present verbatim in the context, "
213
- f"reply EXACTLY: \"{INSUFFICIENT_EN}\". "
214
- "Include short page citations like (p.12). Answer in the user's language."
215
- )
216
- sys_ar = (
217
- "أنت مساعد سياسات برنامج علوم الحاسب بجامعة النيل. أجب فقط من السياق المقدم. "
218
- f"إذا لم يظهر الاسم/المقرر المطلوب نصًا داخل السياق فأجِب نصًا: \"{INSUFFICIENT_AR}\". "
219
- "ضمّن إشارة صفحة موجزة مثل (ص.12). أجب بلغة المستخدم."
220
- )
221
-
222
- sys = sys_ar if lang == "ar" else sys_en
223
-
224
- seen = set()
225
- blocks = []
226
- for d in passages:
227
- key = (d.get("lang"), d.get("question"), d.get("answer"), d.get("page_number"))
228
- if key in seen:
229
- continue
230
- seen.add(key)
231
- cite = make_citation(d)
232
- q = d.get("question") or ""
233
- a = d.get("answer") or ""
234
- if d.get("lang") == "ar":
235
- blocks.append(f"س: {q}\nج: {a}\nالمصدر: {cite}")
236
- else:
237
- blocks.append(f"Q: {q}\nA: {a}\nSource: {cite}")
238
- ctx = truncate_ctx("\n\n---\n\n".join(blocks))
239
-
240
- if lang == "ar":
241
- user = (
242
- "أجب في جملة أو جملتين فقط بالاعتماد على السياق التالي. "
243
- "إن لم يكن الجواب موجودًا في السياق فأجِب نصًا: \"لا تتوفر معلومة كافية\".\n\n"
244
- f"السؤال: {user_q}\n\nالسياق:\n{ctx}"
245
- )
246
- else:
247
- user = (
248
- "Answer in 1–2 sentences using ONLY the context below. "
249
- "If the answer isn’t in the context, reply EXACTLY: \"Insufficient context\".\n\n"
250
- f"Question: {user_q}\n\nContext:\n{ctx}"
251
- )
252
- return [{"role": "system", "content": sys}, {"role": "user", "content": user}]
253
-
254
- def llm_generate(messages, max_new_tokens=MAX_NEW_TOKENS) -> str:
255
- out = LLM.create_chat_completion(
256
- messages=messages,
257
- temperature=0.0,
258
- max_tokens=max_new_tokens,
259
- repeat_penalty=1.15,
260
- stop=None,
261
- )
262
- try:
263
- return out["choices"][0]["message"]["content"].strip()
264
- except Exception:
265
- return INSUFFICIENT_EN
266
-
267
- def answer_query(user_q: str, top_k: int = TOP_K):
268
- if INDEX is None or EMBEDDER is None or LLM is None:
269
- return INSUFFICIENT_EN, []
270
-
271
- passages = retrieve(user_q, top_k=top_k, lang_hint=detect_lang(user_q or ""))
272
- msgs = build_messages(user_q, passages)
273
- resp = llm_generate(msgs, max_new_tokens=140)
274
- return resp, passages
275
 
276
  # ===============================
277
  # GRADIO UI
278
  # ===============================
279
- def format_passages(passages: List[Dict]) -> str:
280
- lines = []
281
- for i, d in enumerate(passages, 1):
282
- cite = make_citation(d)
283
- q = d.get("question") or ""
284
- a = d.get("answer") or ""
285
- lang = d.get("lang", "")
286
- lines.append(f"{i}. [{lang}] {cite}\nQ: {q}\nA: {a}\n")
287
- return "\n\n".join(lines) if lines else "(no passages)"
288
-
289
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
290
  gr.Markdown("# NU-CS Policy RAG — Qwen 3B (CPU, GGUF via llama.cpp)")
291
- gr.Markdown("Put your 51 JSON files in **./data/pages/** and (re)start the Space. It will build the FAISS index automatically.")
 
 
 
292
 
293
  with gr.Row():
294
- inp = gr.Textbox(label="Your question (AR/EN)", placeholder="مثال: ما هي سياسة الحضور؟ | Example: What is the attendance policy?")
295
- with gr.Row():
296
- topk = gr.Slider(1, 10, value=5, step=1, label="Top-K passages")
 
297
  with gr.Row():
298
- btn = gr.Button("Ask")
299
  with gr.Row():
300
  out = gr.Textbox(label="Answer")
301
  with gr.Row():
@@ -303,18 +27,19 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
303
 
304
  def on_ask(q, k):
305
  try:
306
- k = int(float(k)) if k is not None else 5
307
 
308
  if not q or not q.strip():
309
  return "Please enter a question.", []
310
 
311
  ans, passages = answer_query(q.strip(), k)
312
-
313
  return ans, passages
314
 
315
  except Exception as e:
316
- return f"ERROR: {e}", {"error": str(e)}
 
317
 
 
318
  btn.click(on_ask, inputs=[inp, topk], outputs=[out, ctx], api_name="answer")
319
  inp.submit(on_ask, inputs=[inp, topk], outputs=[out, ctx])
320
 
 
 
 
 
 
1
  import traceback
2
  import gradio as gr
3
 
4
+ from rag_core import answer_query, TOP_K, INSUFFICIENT_EN
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # ===============================
7
  # GRADIO UI
8
  # ===============================
 
 
 
 
 
 
 
 
 
 
9
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
10
  gr.Markdown("# NU-CS Policy RAG — Qwen 3B (CPU, GGUF via llama.cpp)")
11
+ gr.Markdown(
12
+ "Put your page JSON files in **./data/pages/** and (re)start the Space. "
13
+ "It will build the FAISS index automatically."
14
+ )
15
 
16
  with gr.Row():
17
+ inp = gr.Textbox(
18
+ label="Your question (AR/EN)",
19
+ placeholder="مثال: ما هي سياسة الحضور؟ | Example: What is the attendance policy?",
20
+ )
21
  with gr.Row():
22
+ topk = gr.Slider(1, 10, value=TOP_K, step=1, label="Top-K passages")
23
  with gr.Row():
24
  out = gr.Textbox(label="Answer")
25
  with gr.Row():
 
27
 
28
  def on_ask(q, k):
29
  try:
30
+ k = int(float(k)) if k is not None else TOP_K
31
 
32
  if not q or not q.strip():
33
  return "Please enter a question.", []
34
 
35
  ans, passages = answer_query(q.strip(), k)
 
36
  return ans, passages
37
 
38
  except Exception as e:
39
+ # error in debug JSON; keep answer user-friendly
40
+ return f"ERROR: {e}", {"error": traceback.format_exc()}
41
 
42
+ btn = gr.Button("Ask")
43
  btn.click(on_ask, inputs=[inp, topk], outputs=[out, ctx], api_name="answer")
44
  inp.submit(on_ask, inputs=[inp, topk], outputs=[out, ctx])
45
 
rag_core.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, glob, pickle, re
2
+ from typing import List, Dict
3
+ import faiss
4
+ from langdetect import detect
5
+ from sentence_transformers import SentenceTransformer
6
+ from huggingface_hub import hf_hub_download
7
+ from llama_cpp import Llama
8
+
9
+ # ===============================
10
+ # CONFIG
11
+ # ===============================
12
+ DATA_DIR = os.getenv("DATA_DIR", "./data/pages")
13
+ INDEX_PATH = os.getenv("INDEX_PATH", "./artifacts/policy.index")
14
+ DOC_STORE_PATH = os.getenv("DOC_STORE_PATH", "./artifacts/policy_docs.pkl")
15
+ ARTIFACT_DIR = os.path.dirname(INDEX_PATH) or "."
16
+ os.makedirs(ARTIFACT_DIR, exist_ok=True)
17
+
18
+ EMBED_MODEL = os.getenv("EMBED_MODEL", "intfloat/multilingual-e5-base")
19
+
20
+ GGUF_REPO_ID = os.getenv("GGUF_REPO_ID", "Qwen/Qwen2.5-3B-Instruct-GGUF")
21
+ GGUF_FILENAME = os.getenv("GGUF_FILENAME", "qwen2.5-3b-instruct-q4_k_m.gguf")
22
+
23
+ TOP_K = int(os.getenv("TOP_K", "5"))
24
+ MAX_CTX_CHARS = int(os.getenv("MAX_CTX_CHARS", "5000"))
25
+
26
+ N_CTX = int(os.getenv("N_CTX", "4096"))
27
+ MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "140"))
28
+
29
+ INSUFFICIENT_EN = "Insufficient context"
30
+ INSUFFICIENT_AR = "لا تتوفر معلومة كافية"
31
+
32
+ # ===============================
33
+ # HELPERS
34
+ # ===============================
35
+ AR_REGEX = re.compile(r'[\u0600-\u06FF]')
36
+
37
+
38
+ def detect_lang(text: str) -> str:
39
+ if AR_REGEX.search(text or ""):
40
+ return "ar"
41
+ try:
42
+ return "ar" if detect(text or "") == "ar" else "en"
43
+ except Exception:
44
+ return "en"
45
+
46
+
47
+ def normalize_q(text: str) -> str:
48
+ return re.sub(r"\s+", " ", (text or "")).strip()
49
+
50
+
51
+ def make_citation(d: Dict) -> str:
52
+ pg = d.get("page_number", "?")
53
+ sec = d.get("section", d.get("tag", "")) or ""
54
+ return f"p.{pg}" + (f" — {sec}" if sec else "")
55
+
56
+
57
+ def truncate_ctx(s: str, limit: int = MAX_CTX_CHARS) -> str:
58
+ return s if len(s) <= limit else s[:limit] + "\n[...]"
59
+
60
+
61
+ # ===============================
62
+ # DATA LOADING & INDEXING
63
+ # ===============================
64
+ def load_policy_jsons(folder: str):
65
+ docs = []
66
+ files = sorted(glob.glob(os.path.join(folder, "*.json")))
67
+ for fp in files:
68
+ try:
69
+ with open(fp, "r", encoding="utf-8") as f:
70
+ data = json.load(f)
71
+
72
+ page_num = data.get("page_number")
73
+ section = (
74
+ data.get("doc_title", {}).get("en", "")
75
+ or data.get("doc_title", {}).get("ar", "")
76
+ )
77
+ qas = data.get("qas", [])
78
+
79
+ for qa in qas:
80
+ cid = qa.get("canonical_id") or qa.get("id") or os.path.basename(fp)
81
+ q_ar = normalize_q(qa.get("question", {}).get("ar", ""))
82
+ q_en = normalize_q(qa.get("question", {}).get("en", ""))
83
+ a_ar = normalize_q(qa.get("answer", {}).get("ar", ""))
84
+ a_en = normalize_q(qa.get("answer", {}).get("en", ""))
85
+
86
+ if q_ar or a_ar:
87
+ docs.append(
88
+ {
89
+ "id": cid + "::ar",
90
+ "lang": "ar",
91
+ "question": q_ar,
92
+ "answer": a_ar,
93
+ "page_number": page_num,
94
+ "section": section,
95
+ "source_file": fp,
96
+ }
97
+ )
98
+ if q_en or a_en:
99
+ docs.append(
100
+ {
101
+ "id": cid + "::en",
102
+ "lang": "en",
103
+ "question": q_en,
104
+ "answer": a_en,
105
+ "page_number": page_num,
106
+ "section": section,
107
+ "source_file": fp,
108
+ }
109
+ )
110
+ except Exception as e:
111
+ print(f"Error reading {fp}: {e}")
112
+
113
+ print(f"Loaded {len(docs)} QA passages from {len(files)} files.")
114
+ return docs
115
+
116
+
117
+ def passages_text(d: Dict) -> str:
118
+ q = d.get("question") or ""
119
+ a = d.get("answer") or ""
120
+ base = f"Q: {q}\nA: {a}\nSource: page {d.get('page_number', '?')}"
121
+ return "passage: " + base
122
+
123
+
124
+ def build_index(docs, embedder, index_path, doc_store_path):
125
+ if not docs:
126
+ raise ValueError("No documents found to index.")
127
+ texts = [passages_text(d) for d in docs]
128
+ emb = embedder.encode(texts, convert_to_numpy=True, show_progress_bar=True, batch_size=64)
129
+ faiss.normalize_L2(emb)
130
+ index = faiss.IndexFlatIP(embedder.get_sentence_embedding_dimension())
131
+ index.add(emb)
132
+ faiss.write_index(index, index_path)
133
+ with open(doc_store_path, "wb") as f:
134
+ pickle.dump(docs, f)
135
+ print(f"Index built: {len(docs)} items.")
136
+
137
+
138
+ def load_index():
139
+ if not (os.path.exists(INDEX_PATH) and os.path.exists(DOC_STORE_PATH)):
140
+ if not os.path.isdir(DATA_DIR):
141
+ raise FileNotFoundError(f"DATA_DIR not found: {DATA_DIR}")
142
+ docs = load_policy_jsons(DATA_DIR)
143
+ if not docs:
144
+ raise FileNotFoundError(
145
+ f"No JSON files found in {DATA_DIR}. Please add your page JSON files."
146
+ )
147
+ print("Building index...")
148
+ embedder = SentenceTransformer(EMBED_MODEL, device="cpu")
149
+ build_index(docs, embedder, INDEX_PATH, DOC_STORE_PATH)
150
+
151
+ index = faiss.read_index(INDEX_PATH)
152
+ with open(DOC_STORE_PATH, "rb") as f:
153
+ docs = pickle.load(f)
154
+ return index, docs
155
+
156
+
157
+ # Global initialization (for Spaces)
158
+ try:
159
+ INDEX, DOCS = load_index()
160
+ except Exception as e:
161
+ print("Failed to load/build FAISS index:", e)
162
+ INDEX, DOCS = None, []
163
+
164
+ try:
165
+ EMBEDDER = SentenceTransformer(EMBED_MODEL, device="cpu")
166
+ except Exception as e:
167
+ print("Failed to load embedder:", e)
168
+ EMBEDDER = None
169
+
170
+
171
+ # ===============================
172
+ # LLM (llama.cpp CPU) setup
173
+ # ===============================
174
+ def get_llm() -> Llama:
175
+ local_path = hf_hub_download(
176
+ repo_id=GGUF_REPO_ID,
177
+ filename=GGUF_FILENAME,
178
+ local_dir="./models",
179
+ local_dir_use_symlinks=False,
180
+ )
181
+ return Llama(
182
+ model_path=local_path,
183
+ n_threads=max(2, os.cpu_count() or 2),
184
+ n_ctx=N_CTX,
185
+ chat_format="qwen",
186
+ verbose=False,
187
+ )
188
+
189
+
190
+ try:
191
+ LLM = get_llm()
192
+ except Exception as e:
193
+ print("Failed to init LLM:", e)
194
+ LLM = None
195
+
196
+
197
+ # ===============================
198
+ # RETRIEVAL + GENERATION
199
+ # ===============================
200
+ def retrieve(query_text: str, top_k: int = TOP_K, lang_hint: str = None):
201
+ if EMBEDDER is None or INDEX is None:
202
+ return []
203
+
204
+ q_emb = EMBEDDER.encode(
205
+ ["query: " + (query_text or "")],
206
+ convert_to_numpy=True,
207
+ )
208
+ faiss.normalize_L2(q_emb)
209
+ D, I = INDEX.search(q_emb, top_k * 2) # pull more, filter by language
210
+ lang = lang_hint or detect_lang(query_text or "")
211
+
212
+ same_lang, others = [], []
213
+ for i in I[0]:
214
+ if i < 0 or i >= len(DOCS):
215
+ continue
216
+ d = DOCS[i]
217
+ (same_lang if d.get("lang") == lang else others).append(d)
218
+
219
+ out = same_lang[:top_k]
220
+ if len(out) < top_k:
221
+ out.extend(others[: top_k - len(out)])
222
+ return out[:top_k]
223
+
224
+
225
+ def build_messages(user_q: str, passages: List[Dict]):
226
+ lang = detect_lang(user_q or "")
227
+
228
+ sys_en = (
229
+ "You are NU-CS Policy Assistant. Answer ONLY using the provided context. "
230
+ "If the requested person/course/section is NOT present verbatim in the context, "
231
+ f"reply EXACTLY: \"{INSUFFICIENT_EN}\". "
232
+ "Include short page citations like (p.12). Answer in the user's language."
233
+ )
234
+ sys_ar = (
235
+ "أنت مساعد سياسات برنامج علوم الحاسب بجامعة النيل. أجب فقط من السياق المقدم. "
236
+ f"إذا لم يظهر الاسم/المقرر المطلوب نصًا داخل السياق فأجِب نصًا: \"{INSUFFICIENT_AR}\". "
237
+ "ضمّن إشارة صفحة موجزة مثل (ص.12). أجب بلغة المستخدم."
238
+ )
239
+
240
+ sys = sys_ar if lang == "ar" else sys_en
241
+
242
+ seen = set()
243
+ blocks = []
244
+ for d in passages:
245
+ key = (d.get("lang"), d.get("question"), d.get("answer"), d.get("page_number"))
246
+ if key in seen:
247
+ continue
248
+ seen.add(key)
249
+ cite = make_citation(d)
250
+ q = d.get("question") or ""
251
+ a = d.get("answer") or ""
252
+ if d.get("lang") == "ar":
253
+ blocks.append(f"س: {q}\nج: {a}\nالمصدر: {cite}")
254
+ else:
255
+ blocks.append(f"Q: {q}\nA: {a}\nSource: {cite}")
256
+
257
+ ctx = truncate_ctx("\n\n---\n\n".join(blocks))
258
+
259
+ if lang == "ar":
260
+ user = (
261
+ f"أجب في جملة أو جملتين فقط بالاعتماد على السياق التالي. "
262
+ f"إن لم يكن الجواب موجودًا في السياق فأجِب نصًا: \"{INSUFFICIENT_AR}\".\n\n"
263
+ f"السؤال: {user_q}\n\nالسياق:\n{ctx}"
264
+ )
265
+ else:
266
+ user = (
267
+ f"Answer in 1–2 sentences using ONLY the context below. "
268
+ f"If the answer isn’t in the context, reply EXACTLY: \"{INSUFFICIENT_EN}\".\n\n"
269
+ f"Question: {user_q}\n\nContext:\n{ctx}"
270
+ )
271
+
272
+ return [{"role": "system", "content": sys}, {"role": "user", "content": user}]
273
+
274
+
275
+ def llm_generate(messages, max_new_tokens: int = MAX_NEW_TOKENS) -> str:
276
+ if LLM is None:
277
+ return INSUFFICIENT_EN
278
+ out = LLM.create_chat_completion(
279
+ messages=messages,
280
+ temperature=0.0,
281
+ max_tokens=max_new_tokens,
282
+ repeat_penalty=1.15,
283
+ stop=None,
284
+ )
285
+ try:
286
+ return out["choices"][0]["message"]["content"].strip()
287
+ except Exception:
288
+ return INSUFFICIENT_EN
289
+
290
+
291
+ def answer_query(user_q: str, top_k: int = TOP_K):
292
+ if INDEX is None or EMBEDDER is None or LLM is None:
293
+ lang = detect_lang(user_q or "")
294
+ msg = INSUFFICIENT_AR if lang == "ar" else INSUFFICIENT_EN
295
+ return msg, []
296
+
297
+ lang = detect_lang(user_q or "")
298
+ passages = retrieve(user_q, top_k=top_k, lang_hint=lang)
299
+
300
+ # If retrieval found nothing, don't waste tokens on the LLM
301
+ if not passages:
302
+ msg = INSUFFICIENT_AR if lang == "ar" else INSUFFICIENT_EN
303
+ return msg, []
304
+
305
+ msgs = build_messages(user_q, passages)
306
+ resp = llm_generate(msgs)
307
+ return resp, passages
requirements.txt CHANGED
@@ -2,10 +2,7 @@ faiss-cpu==1.8.0.post1
2
  sentence-transformers==3.0.1
3
  torch==2.3.1
4
  huggingface_hub==0.24.5
5
-
6
- # llama.cpp CPU bindings
7
  llama-cpp-python==0.2.90
8
-
9
  tqdm==4.66.4
10
  langdetect==1.0.9
11
  unidecode==1.3.8
 
2
  sentence-transformers==3.0.1
3
  torch==2.3.1
4
  huggingface_hub==0.24.5
 
 
5
  llama-cpp-python==0.2.90
 
6
  tqdm==4.66.4
7
  langdetect==1.0.9
8
  unidecode==1.3.8