Dyraa18 commited on
Commit
313a832
·
verified ·
1 Parent(s): 21c4988
Files changed (1) hide show
  1. app.py +221 -158
app.py CHANGED
@@ -1,5 +1,9 @@
1
  # app.py
2
- # Flask RAG app (HF Spaces / Static) — dataset sudah ada di Space.
 
 
 
 
3
  import os, json, re, time, logging
4
  from functools import lru_cache, wraps
5
  from typing import Dict, List, Tuple
@@ -8,7 +12,10 @@ from datetime import datetime
8
  from zoneinfo import ZoneInfo
9
  from pathlib import Path
10
 
11
- from flask import Flask, render_template, request, redirect, url_for, session, jsonify, flash
 
 
 
12
  import numpy as np
13
  import faiss
14
  import torch
@@ -20,68 +27,75 @@ load_dotenv()
20
  # ========= ENV & LOGGING =========
21
  os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
22
  os.environ.setdefault("OMP_NUM_THREADS", "1")
23
- torch.set_num_threads(1)
24
- torch.set_num_interop_threads(1)
 
 
 
 
 
25
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
26
  log = logging.getLogger("rag-app")
27
 
28
  # ========= IMPORT EKSTERNAL =========
29
- from Guardrail import validate_input # -> bool (lazy di file)
 
 
 
30
  from Model import load_model, generate # -> llama.cpp wrapper
31
 
32
  # ========= PATH ROOT PROYEK =========
33
  BASE_DIR = Path(__file__).resolve().parent
34
 
35
  # ========= KONFIGURASI RAG =========
36
- MODEL_PATH = str(BASE_DIR / "models" / os.getenv("GGUF_FILENAME", "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf"))
37
- CTX_WINDOW = 4096
38
- N_GPU_LAYERS = 0 # HF Spaces CPU only
39
- N_THREADS = int(os.environ.get("NUM_THREADS", "4"))
40
 
41
- ENCODER_NAME = "intfloat/multilingual-e5-large"
 
42
  ENCODER_DEVICE = torch.device("cpu")
43
 
44
  # Dataset sudah ada di Space → path RELATIF
45
  SUBJECTS: Dict[str, Dict[str, str]] = {
46
  "ipas": {
47
- "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Ipas" / "IPA_index.index"),
48
- "chunks": str(BASE_DIR / "Dataset" / "Ipas" / "Chunk" / "ipas_chunks.json"),
49
- "embeddings": str(BASE_DIR / "Dataset" / "Ipas" / "Embedd"/ "ipas_embeddings.npy"),
50
- "label": "IPAS",
51
- "desc": "Ilmu Pengetahuan Alam dan Sosial"
52
  },
53
  "penjas": {
54
- "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Penjas" / "PENJAS_index.index"),
55
- "chunks": str(BASE_DIR / "Dataset" / "Penjas" / "Chunk" / "penjas_chunks.json"),
56
- "embeddings": str(BASE_DIR / "Dataset" / "Penjas" / "Embedd" / "penjas_embeddings.npy"),
57
- "label": "PJOK",
58
- "desc": "Pendidikan Jasmani, Olahraga, dan Kesehatan"
59
  },
60
  "pancasila": {
61
- "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Pancasila" / "PANCASILA_index.index"),
62
- "chunks": str(BASE_DIR / "Dataset" / "Pancasila" / "Chunk" / "pancasila_chunks.json"),
63
- "embeddings": str(BASE_DIR / "Dataset" / "Pancasila" / "Embedd" / "pancasila_embeddings.npy"),
64
- "label": "PANCASILA",
65
- "desc": "Pendidikan Pancasila dan Kewarganegaraan"
66
  }
67
  }
68
 
69
  # Threshold dan fallback
70
- TOP_K_FAISS = 24
71
- TOP_K_FINAL = 10
72
- MIN_COSINE = 0.84
73
- MIN_HYBRID = 0.15
74
-
75
- FALLBACK_TEXT = "maap pengetahuan tidak ada dalam database"
76
- GUARDRAIL_BLOCK_TEXT = "maap, pertanyaan ditolak oleh guardrail"
77
- ENABLE_PROFILING = False
78
 
79
  # ========= APP =========
80
  app = Flask(__name__)
81
  app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-secret-please-change")
82
 
83
  from werkzeug.middleware.proxy_fix import ProxyFix
84
-
85
  app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
86
  # supaya session tersimpan di browser saat lewat proxy/HTTPS (HF Spaces)
87
  app.config.update(
@@ -93,11 +107,10 @@ app.config.update(
93
  PREFERRED_URL_SCHEME="https",
94
  )
95
 
96
-
97
  # ========= GLOBAL MODEL =========
98
  ENCODER_TOKENIZER = None
99
- ENCODER_MODEL = None
100
- LLM = None
101
 
102
  @dataclass(frozen=True)
103
  class SubjectAssets:
@@ -112,11 +125,14 @@ STOPWORDS_ID = {
112
  "agar","sehingga","terhadap","dapat","juga","para","diri",
113
  }
114
  TOKEN_RE = re.compile(r"[A-Za-zÀ-ÖØ-öø-ÿ]+", re.UNICODE)
 
115
  def tok_id(text: str) -> List[str]:
116
  return [t.lower() for t in TOKEN_RE.findall(text or "") if t.lower() not in STOPWORDS_ID]
 
117
  def lexical_overlap(query: str, sent: str) -> float:
118
  q = set(tok_id(query)); s = set(tok_id(sent))
119
- if not q or not s: return 0.0
 
120
  return len(q & s) / max(1, len(q | s))
121
 
122
  QUESTION_LIKE_RE = re.compile(r"(^\s*(apa|mengapa|bagaimana|sebutkan|jelaskan)\b|[?]$)", re.IGNORECASE)
@@ -135,7 +151,8 @@ def clean_prefix(t: str) -> str:
135
  t = (t or "").strip()
136
  for _ in range(5):
137
  t2 = META_PREFIX_RE.sub("", t).lstrip()
138
- if t2 == t: break
 
139
  t = t2
140
  return t
141
 
@@ -143,23 +160,30 @@ def strip_meta_sentence(s: str) -> str:
143
  s = clean_prefix(s or "")
144
  if re.match(r"^\s*(berdasarkan|menurut|merujuk|mengacu|bersumber|dari)\b", s, re.IGNORECASE):
145
  s = re.sub(r"^\s*[^,.;!?]*[,.;!?]\s*", "", s) or s
146
- s = clean_prefix(s)
147
  return s.strip()
148
 
149
  SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+")
 
150
  def split_sentences(text: str) -> List[str]:
151
  outs = []
152
  for p in SENT_SPLIT_RE.split(text or ""):
153
  s = clean_prefix((p or "").strip())
154
- if not s: continue
155
- if s[-1] not in ".!?": s += "."
156
- if QUESTION_LIKE_RE.search(s): continue
157
- if INSTRUCTION_RE.search(s): continue
158
- if len(s.strip()) < 10: continue
 
 
 
 
 
159
  outs.append(s)
160
  return outs
161
 
162
  # ========= MODEL WARMUP (LAZY) =========
 
163
  def warmup_models():
164
  global ENCODER_TOKENIZER, ENCODER_MODEL, LLM
165
  if ENCODER_TOKENIZER is None or ENCODER_MODEL is None:
@@ -171,30 +195,34 @@ def warmup_models():
171
  LLM = load_model(MODEL_PATH, n_ctx=CTX_WINDOW, n_gpu_layers=N_GPU_LAYERS, n_threads=N_THREADS)
172
 
173
  # ========= LOAD ASSETS PER-MAPEL =========
 
174
  @lru_cache(maxsize=8)
175
  def load_subject_assets(subject_key: str) -> SubjectAssets:
176
  if subject_key not in SUBJECTS:
177
  raise ValueError(f"Unknown subject: {subject_key}")
178
  cfg = SUBJECTS[subject_key]
179
  log.info(f"[ASSETS] Loading subject={subject_key} | index={cfg['index']}")
180
- if not os.path.exists(cfg["index"]): raise FileNotFoundError(cfg["index"])
181
- if not os.path.exists(cfg["chunks"]): raise FileNotFoundError(cfg["chunks"])
182
- if not os.path.exists(cfg["embeddings"]): raise FileNotFoundError(cfg["embeddings"])
183
-
 
 
184
  index = faiss.read_index(cfg["index"])
185
  with open(cfg["chunks"], "r", encoding="utf-8") as f:
186
  texts = [it["text"] for it in json.load(f)]
187
- embs = np.load(cfg["embeddings"])
188
  if index.ntotal != len(embs):
189
  raise RuntimeError(f"Mismatch ntotal({index.ntotal}) vs emb({len(embs)})")
190
-
191
  return SubjectAssets(index=index, texts=texts, embs=embs)
192
 
193
  # ========= ENCODER & RETRIEVAL =========
 
194
  @torch.inference_mode()
195
  def encode_query_exact(text: str) -> np.ndarray:
196
  toks = ENCODER_TOKENIZER(text, padding=True, truncation=True, return_tensors="pt").to(ENCODER_DEVICE)
197
  out = ENCODER_MODEL(**toks)
 
198
  vec = out.last_hidden_state.mean(dim=1)
199
  return vec.cpu().numpy()
200
 
@@ -228,7 +256,8 @@ def retrieve_rerank_cosine(query: str, subject_key: str) -> List[str]:
228
  return top_texts
229
 
230
  def pick_best_sentences(query: str, chunks: List[str], top_k: int = 5) -> List[str]:
231
- if not chunks: return []
 
232
  qv = encode_query_exact(query).reshape(-1)
233
  cands: List[Tuple[float, str]] = []
234
  for ch in chunks:
@@ -246,18 +275,20 @@ def pick_best_sentences(query: str, chunks: List[str], top_k: int = 5) -> List[s
246
  def build_prompt(user_query: str, sentences: List[str]) -> str:
247
  block = "\n".join(f"- {clean_prefix(s)}" for s in sentences)
248
  system = (
249
- "- Gunakan HANYA daftar kalimat fakta berikut sebagai sumber.\n"
250
- "- Jika tidak ada kalimat yang menjawab, balas: maap pengetahuan tidak ada dalam database\n"
 
251
  "- Jawab TEPAT 1 kalimat, ringkas, Bahasa Indonesia baku.\n"
252
- "- DILARANG menulis frasa meta seperti 'berdasarkan', 'menurut', 'merujuk', atau 'bersumber'."
 
 
 
 
 
 
 
 
253
  )
254
- return f"""{system}
255
- KALIMAT SUMBER:
256
- {block}
257
- PERTANYAAN:
258
- {user_query}
259
- JAWAB (1 kalimat saja):
260
- """
261
 
262
  @lru_cache(maxsize=512)
263
  def validate_input_cached(q: str) -> bool:
@@ -270,7 +301,7 @@ def validate_input_cached(q: str) -> bool:
270
  # ========= AUTH (POSTGRES) =========
271
  from werkzeug.security import generate_password_hash, check_password_hash
272
  from sqlalchemy import create_engine, Column, Integer, String, Text, Boolean, func, or_
273
- from sqlalchemy.orm import sessionmaker, scoped_session, declarative_base
274
 
275
  POSTGRES_URL = os.environ.get("POSTGRES_URL")
276
  if not POSTGRES_URL:
@@ -282,25 +313,26 @@ Base = declarative_base()
282
 
283
  class User(Base):
284
  __tablename__ = "users"
285
- id = Column(Integer, primary_key=True)
286
- username = Column(String(50), unique=True, nullable=False, index=True)
287
- email = Column(String(120), unique=True, nullable=False, index=True)
288
- password = Column(Text, nullable=False)
289
  is_active = Column(Boolean, default=True, nullable=False)
290
  is_admin = Column(Boolean, default=False, nullable=False)
291
 
292
  class ChatHistory(Base):
293
  __tablename__ = "chat_history"
294
- id = Column(Integer, primary_key=True)
295
- user_id = Column(Integer, nullable=False, index=True)
296
  subject_key = Column(String(50), nullable=False, index=True)
297
- role = Column(String(10), nullable=False)
298
- message = Column(Text, nullable=False)
299
- timestamp = Column(Integer, server_default=func.extract("epoch", func.now()))
300
 
301
  Base.metadata.create_all(bind=engine)
302
 
303
  JKT_TZ = ZoneInfo("Asia/Jakarta")
 
304
  @app.template_filter("fmt_ts")
305
  def fmt_ts(epoch_int: int):
306
  try:
@@ -331,7 +363,6 @@ def admin_required(view_func):
331
  return view_func(*args, **kwargs)
332
  return wrapper
333
 
334
-
335
  # ========= ROUTES =========
336
  @app.route("/")
337
  def root():
@@ -341,10 +372,7 @@ def root():
341
  def auth_login():
342
  if request.method == "POST":
343
  identity = (
344
- request.form.get("identity")
345
- or request.form.get("email")
346
- or request.form.get("username")
347
- or ""
348
  ).strip().lower()
349
  pw_input = (request.form.get("password") or "").strip()
350
 
@@ -356,9 +384,8 @@ def auth_login():
356
  try:
357
  user = (
358
  s.query(User)
359
- .filter(or_(func.lower(User.username) == identity,
360
- func.lower(User.email) == identity))
361
- .first()
362
  )
363
  log.info(f"[LOGIN] identity='{identity}' found={bool(user)} active={getattr(user,'is_active',None)}")
364
  ok = bool(user and user.is_active and check_password_hash(user.password, pw_input))
@@ -370,11 +397,12 @@ def auth_login():
370
  return render_template("login.html"), 401
371
 
372
  session["logged_in"] = True
373
- session["user_id"] = user.id
374
- session["username"] = user.username
375
- session["is_admin"] = bool(user.is_admin)
376
  log.info(f"[LOGIN] OK user_id={user.id}; session set.")
377
  return redirect(url_for("subjects"))
 
378
  return render_template("login.html")
379
 
380
  @app.route("/whoami")
@@ -386,7 +414,6 @@ def whoami():
386
  "is_admin": session.get("is_admin"),
387
  }
388
 
389
-
390
  @app.route("/auth/register", methods=["GET", "POST"])
391
  def auth_register():
392
  if request.method == "POST":
@@ -394,6 +421,7 @@ def auth_register():
394
  email = (request.form.get("email") or "").strip().lower()
395
  pw = (request.form.get("password") or "").strip()
396
  confirm = (request.form.get("confirm") or "").strip()
 
397
  if not username or not email or not pw:
398
  flash("Semua field wajib diisi.", "error")
399
  return render_template("register.html"), 400
@@ -403,13 +431,13 @@ def auth_register():
403
  if pw != confirm:
404
  flash("Konfirmasi password tidak cocok.", "error")
405
  return render_template("register.html"), 400
 
406
  s = db()
407
  try:
408
  existed = (
409
  s.query(User)
410
- .filter(or_(func.lower(User.username) == username,
411
- func.lower(User.email) == email))
412
- .first()
413
  )
414
  if existed:
415
  flash("Username/Email sudah terpakai.", "error")
@@ -418,8 +446,10 @@ def auth_register():
418
  s.add(u); s.commit()
419
  finally:
420
  s.close()
 
421
  flash("Registrasi berhasil. Silakan login.", "success")
422
  return redirect(url_for("auth_login"))
 
423
  return render_template("register.html")
424
 
425
  @app.route("/auth/logout")
@@ -431,14 +461,12 @@ def auth_logout():
431
  def about():
432
  return render_template("about.html")
433
 
434
-
435
  @app.route("/subjects")
436
  @login_required
437
  def subjects():
438
  log.info(f"[SESSION DEBUG] logged_in={session.get('logged_in')} user_id={session.get('user_id')}")
439
  return render_template("home.html", subjects=SUBJECTS)
440
 
441
-
442
  @app.route("/chat/<subject_key>")
443
  @login_required
444
  def chat_subject(subject_key: str):
@@ -452,9 +480,9 @@ def chat_subject(subject_key: str):
452
  uid = session.get("user_id")
453
  rows = (
454
  s.query(ChatHistory)
455
- .filter_by(user_id=uid, subject_key=subject_key)
456
- .order_by(ChatHistory.id.asc())
457
- .all()
458
  )
459
  history = [{"role": r.role, "message": r.message} for r in rows]
460
  finally:
@@ -464,7 +492,13 @@ def chat_subject(subject_key: str):
464
 
465
  @app.route("/health")
466
  def health():
467
- return jsonify({"ok": True, "encoder_loaded": ENCODER_MODEL is not None, "llm_loaded": LLM is not None})
 
 
 
 
 
 
468
 
469
  @app.route("/ask/<subject_key>", methods=["POST"])
470
  @login_required
@@ -474,13 +508,13 @@ def ask(subject_key: str):
474
 
475
  # pastikan model siap saat request (lazy)
476
  warmup_models()
477
-
478
  t0 = time.perf_counter()
479
- data = request.get_json(silent=True) or {}
480
- query = (data.get("message") or "").strip()
481
 
 
 
482
  if not query:
483
  return jsonify({"ok": False, "error": "empty query"}), 400
 
484
  if not validate_input_cached(query):
485
  return jsonify({"ok": True, "answer": GUARDRAIL_BLOCK_TEXT})
486
 
@@ -498,6 +532,7 @@ def ask(subject_key: str):
498
  chunks = retrieve_rerank_cosine(query, subject_key)
499
  if not chunks:
500
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
 
501
  sentences = pick_best_sentences(query, chunks, top_k=5)
502
  if not sentences:
503
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
@@ -506,30 +541,29 @@ def ask(subject_key: str):
506
 
507
  try:
508
  raw_answer = generate(
509
- LLM, prompt,
510
- max_tokens=64, temperature=0.2, top_p=1.0,
511
- stop=["\n\n", "\n###", "###", "\nUser:",
512
- "Berdasarkan", "berdasarkan", "Menurut", "menurut",
513
- "Merujuk", "merujuk", "Mengacu", "mengacu", "Bersumber", "bersumber"]
 
514
  )
515
  if raw_answer is None:
516
  raw_answer = ""
517
  raw_answer = raw_answer.strip()
518
-
519
- # Log raw model output for debugging
520
  log.info(f"[LLM] Raw answer repr: {repr(raw_answer)}")
521
 
522
- # Hati-hati: banyak model menyisipkan <think>...</think>
523
- # Hapus block <think>...</think> dengan aman (case-insensitive)
524
- cleaned = re.sub(r"<think\b[^>]*>.*?</think>", "", raw_answer, flags=re.DOTALL | re.IGNORECASE).strip()
525
- # Jika masih ada sisa tag tunggal seperti </think> atau <think>, hapus saja tag-tag tersebut
526
- cleaned = re.sub(r"</?think\b[^>]*>", "", cleaned, flags=re.IGNORECASE).strip()
527
 
528
- # Jika pembersihan menghapus semuanya, coba fallback: keluarkan teks yang bukan tag dari raw_answer
529
- if not cleaned:
530
- non_tag = re.sub(r"<[^>]+>", "", raw_answer).strip()
531
- if non_tag:
532
- cleaned = non_tag
 
 
533
 
534
  answer = cleaned
535
 
@@ -548,40 +582,49 @@ def ask(subject_key: str):
548
  uid = session.get("user_id")
549
  s.add_all([
550
  ChatHistory(user_id=uid, subject_key=subject_key, role="user", message=query),
551
- ChatHistory(user_id=uid, subject_key=subject_key, role="bot", message=answer)
552
  ])
553
  s.commit()
554
  except Exception as e:
555
  log.exception(f"[DB] gagal simpan chat history: {e}")
556
  finally:
557
- s.close()
 
 
 
558
 
559
  if not answer or len(answer) < 2:
560
  answer = FALLBACK_TEXT
561
 
562
  if ENABLE_PROFILING:
563
- log.info({"latency_total": time.perf_counter() - t0, "subject": subject_key, "faiss_best": best})
 
 
 
 
564
 
565
  return jsonify({"ok": True, "answer": answer})
566
 
567
  # ===== Admin views & delete actions (tetap) =====
568
- from sqlalchemy.orm import Session
569
  @app.route("/admin")
570
  @admin_required
571
  def admin_dashboard():
572
  s = db()
573
  try:
574
- total_users = s.query(func.count(User.id)).scalar() or 0
575
- total_active = s.query(func.count(User.id)).filter(User.is_active.is_(True)).scalar() or 0
576
- total_admins = s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0
577
- total_msgs = s.query(func.count(ChatHistory.id)).scalar() or 0
578
  finally:
579
  s.close()
580
- return render_template("admin_dashboard.html",
581
- total_users=total_users,
582
- total_active=total_active,
583
- total_admins=total_admins,
584
- total_msgs=total_msgs)
 
 
585
 
586
  @app.route("/admin/users")
587
  @admin_required
@@ -589,40 +632,46 @@ def admin_users():
589
  q = (request.args.get("q") or "").strip().lower()
590
  page = max(int(request.args.get("page", 1)), 1)
591
  per_page = min(max(int(request.args.get("per_page", 20)), 5), 100)
 
592
  s = db()
593
  try:
594
  base = s.query(User)
595
  if q:
596
- base = base.filter(or_(
597
- func.lower(User.username).like(f"%{q}%"),
598
- func.lower(User.email).like(f"%{q}%")
599
- ))
 
 
600
  total = base.count()
601
- users = (base
602
- .order_by(User.id.asc())
603
- .offset((page - 1) * per_page)
604
- .limit(per_page)
605
- .all())
 
606
  user_ids = [u.id for u in users] or [-1]
607
- counts = dict(s.query(ChatHistory.user_id, func.count(ChatHistory.id))
608
- .filter(ChatHistory.user_id.in_(user_ids))
609
- .group_by(ChatHistory.user_id)
610
- .all())
 
 
611
  finally:
612
  s.close()
613
- return render_template("admin_users.html",
614
- users=users, counts=counts,
615
- q=q, page=page, per_page=per_page, total=total)
616
 
617
  @app.route("/admin/history")
618
  @admin_required
619
  def admin_history():
620
- q = (request.args.get("q") or "").strip().lower()
621
- username = (request.args.get("username") or "").strip().lower()
622
- subject = (request.args.get("subject") or "").strip().lower()
623
- role = (request.args.get("role") or "").strip().lower()
624
- page = max(int(request.args.get("page", 1)), 1)
625
- per_page = min(max(int(request.args.get("per_page", 30)), 5), 200)
 
626
 
627
  s = db()
628
  try:
@@ -630,19 +679,24 @@ def admin_history():
630
  if q:
631
  base = base.filter(func.lower(ChatHistory.message).like(f"%{q}%"))
632
  if username:
633
- base = base.filter(or_(
634
- func.lower(User.username) == username,
635
- func.lower(User.email) == username
636
- ))
 
 
637
  if subject:
638
  base = base.filter(func.lower(ChatHistory.subject_key) == subject)
639
  if role in ("user", "bot"):
640
  base = base.filter(ChatHistory.role == role)
 
641
  total = base.count()
642
- rows = (base.order_by(ChatHistory.id.desc())
 
643
  .offset((page - 1) * per_page)
644
  .limit(per_page)
645
- .all())
 
646
  finally:
647
  s.close()
648
 
@@ -656,10 +710,19 @@ def admin_history():
656
  "timestamp": r.ChatHistory.timestamp,
657
  } for r in rows]
658
 
659
- return render_template("admin_history.html",
660
- items=items, subjects=SUBJECTS,
661
- q=q, username=username, subject=subject, role=role,
662
- page=page, per_page=per_page, total=total)
 
 
 
 
 
 
 
 
 
663
 
664
  def _is_last_admin(s: Session) -> bool:
665
  return (s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0) <= 1
 
1
  # app.py
2
+ # Flask RAG app (HF Spaces / CPU) — fixed finalization protocol for R1-style models
3
+ # - Forces model to write answer inside <final>...</final> and stops at </final>
4
+ # - Safer cleaning of <think> blocks
5
+ # - Same routes, admin pages, and Postgres auth as before
6
+
7
  import os, json, re, time, logging
8
  from functools import lru_cache, wraps
9
  from typing import Dict, List, Tuple
 
12
  from zoneinfo import ZoneInfo
13
  from pathlib import Path
14
 
15
+ from flask import (
16
+ Flask, render_template, request, redirect, url_for, session, jsonify, flash
17
+ )
18
+
19
  import numpy as np
20
  import faiss
21
  import torch
 
27
  # ========= ENV & LOGGING =========
28
  os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
29
  os.environ.setdefault("OMP_NUM_THREADS", "1")
30
+ # keep CPU footprint low in HF Spaces
31
+ try:
32
+ torch.set_num_threads(int(os.environ.get("NUM_THREADS", "4")))
33
+ torch.set_num_interop_threads(1)
34
+ except Exception:
35
+ pass
36
+
37
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
38
  log = logging.getLogger("rag-app")
39
 
40
  # ========= IMPORT EKSTERNAL =========
41
+ # Expect file Guardrail.py with validate_input(text:str)->bool
42
+ # Expect file Model.py with load_model(gguf_path, n_ctx, n_gpu_layers, n_threads) and
43
+ # generate(llm, prompt, max_tokens, temperature, top_p, stop:list[str]) -> str
44
+ from Guardrail import validate_input # -> bool (lazy in file)
45
  from Model import load_model, generate # -> llama.cpp wrapper
46
 
47
  # ========= PATH ROOT PROYEK =========
48
  BASE_DIR = Path(__file__).resolve().parent
49
 
50
  # ========= KONFIGURASI RAG =========
51
+ MODEL_PATH = str(BASE_DIR / "models" / os.getenv("GGUF_FILENAME", "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf"))
52
+ CTX_WINDOW = int(os.environ.get("CTX_WINDOW", 2048)) # 2048 cukup untuk RAG singkat
53
+ N_GPU_LAYERS = int(os.environ.get("N_GPU_LAYERS", 0)) # HF Spaces CPU only
54
+ N_THREADS = int(os.environ.get("NUM_THREADS", 4))
55
 
56
+ # ganti ke encoder lain jika perlu (m-e5-large cukup bagus untuk multilingual)
57
+ ENCODER_NAME = os.environ.get("ENCODER_NAME", "intfloat/multilingual-e5-large")
58
  ENCODER_DEVICE = torch.device("cpu")
59
 
60
  # Dataset sudah ada di Space → path RELATIF
61
  SUBJECTS: Dict[str, Dict[str, str]] = {
62
  "ipas": {
63
+ "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Ipas" / "IPA_index.index"),
64
+ "chunks": str(BASE_DIR / "Dataset" / "Ipas" / "Chunk" / "ipas_chunks.json"),
65
+ "embeddings": str(BASE_DIR / "Dataset" / "Ipas" / "Embedd"/ "ipas_embeddings.npy"),
66
+ "label": "IPAS",
67
+ "desc": "Ilmu Pengetahuan Alam dan Sosial"
68
  },
69
  "penjas": {
70
+ "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Penjas" / "PENJAS_index.index"),
71
+ "chunks": str(BASE_DIR / "Dataset" / "Penjas" / "Chunk" / "penjas_chunks.json"),
72
+ "embeddings": str(BASE_DIR / "Dataset" / "Penjas" / "Embedd" / "penjas_embeddings.npy"),
73
+ "label": "PJOK",
74
+ "desc": "Pendidikan Jasmani, Olahraga, dan Kesehatan"
75
  },
76
  "pancasila": {
77
+ "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Pancasila" / "PANCASILA_index.index"),
78
+ "chunks": str(BASE_DIR / "Dataset" / "Pancasila" / "Chunk" / "pancasila_chunks.json"),
79
+ "embeddings": str(BASE_DIR / "Dataset" / "Pancasila" / "Embedd" / "pancasila_embeddings.npy"),
80
+ "label": "PANCASILA",
81
+ "desc": "Pendidikan Pancasila dan Kewarganegaraan"
82
  }
83
  }
84
 
85
  # Threshold dan fallback
86
+ TOP_K_FAISS = int(os.environ.get("TOP_K_FAISS", 24))
87
+ TOP_K_FINAL = int(os.environ.get("TOP_K_FINAL", 10))
88
+ MIN_COSINE = float(os.environ.get("MIN_COSINE", 0.84))
89
+ MIN_HYBRID = float(os.environ.get("MIN_HYBRID", 0.15))
90
+ FALLBACK_TEXT = os.environ.get("FALLBACK_TEXT", "maap pengetahuan tidak ada dalam database")
91
+ GUARDRAIL_BLOCK_TEXT = os.environ.get("GUARDRAIL_BLOCK_TEXT", "maap, pertanyaan ditolak oleh guardrail")
92
+ ENABLE_PROFILING = os.environ.get("ENABLE_PROFILING", "false").lower() == "true"
 
93
 
94
  # ========= APP =========
95
  app = Flask(__name__)
96
  app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-secret-please-change")
97
 
98
  from werkzeug.middleware.proxy_fix import ProxyFix
 
99
  app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
100
  # supaya session tersimpan di browser saat lewat proxy/HTTPS (HF Spaces)
101
  app.config.update(
 
107
  PREFERRED_URL_SCHEME="https",
108
  )
109
 
 
110
  # ========= GLOBAL MODEL =========
111
  ENCODER_TOKENIZER = None
112
+ ENCODER_MODEL = None
113
+ LLM = None
114
 
115
  @dataclass(frozen=True)
116
  class SubjectAssets:
 
125
  "agar","sehingga","terhadap","dapat","juga","para","diri",
126
  }
127
  TOKEN_RE = re.compile(r"[A-Za-zÀ-ÖØ-öø-ÿ]+", re.UNICODE)
128
+
129
  def tok_id(text: str) -> List[str]:
130
  return [t.lower() for t in TOKEN_RE.findall(text or "") if t.lower() not in STOPWORDS_ID]
131
+
132
  def lexical_overlap(query: str, sent: str) -> float:
133
  q = set(tok_id(query)); s = set(tok_id(sent))
134
+ if not q or not s:
135
+ return 0.0
136
  return len(q & s) / max(1, len(q | s))
137
 
138
  QUESTION_LIKE_RE = re.compile(r"(^\s*(apa|mengapa|bagaimana|sebutkan|jelaskan)\b|[?]$)", re.IGNORECASE)
 
151
  t = (t or "").strip()
152
  for _ in range(5):
153
  t2 = META_PREFIX_RE.sub("", t).lstrip()
154
+ if t2 == t:
155
+ break
156
  t = t2
157
  return t
158
 
 
160
  s = clean_prefix(s or "")
161
  if re.match(r"^\s*(berdasarkan|menurut|merujuk|mengacu|bersumber|dari)\b", s, re.IGNORECASE):
162
  s = re.sub(r"^\s*[^,.;!?]*[,.;!?]\s*", "", s) or s
163
+ s = clean_prefix(s)
164
  return s.strip()
165
 
166
  SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+")
167
+
168
  def split_sentences(text: str) -> List[str]:
169
  outs = []
170
  for p in SENT_SPLIT_RE.split(text or ""):
171
  s = clean_prefix((p or "").strip())
172
+ if not s:
173
+ continue
174
+ if s[-1] not in ".!?":
175
+ s += "."
176
+ if QUESTION_LIKE_RE.search(s):
177
+ continue
178
+ if INSTRUCTION_RE.search(s):
179
+ continue
180
+ if len(s.strip()) < 10:
181
+ continue
182
  outs.append(s)
183
  return outs
184
 
185
  # ========= MODEL WARMUP (LAZY) =========
186
+
187
  def warmup_models():
188
  global ENCODER_TOKENIZER, ENCODER_MODEL, LLM
189
  if ENCODER_TOKENIZER is None or ENCODER_MODEL is None:
 
195
  LLM = load_model(MODEL_PATH, n_ctx=CTX_WINDOW, n_gpu_layers=N_GPU_LAYERS, n_threads=N_THREADS)
196
 
197
  # ========= LOAD ASSETS PER-MAPEL =========
198
+
199
  @lru_cache(maxsize=8)
200
  def load_subject_assets(subject_key: str) -> SubjectAssets:
201
  if subject_key not in SUBJECTS:
202
  raise ValueError(f"Unknown subject: {subject_key}")
203
  cfg = SUBJECTS[subject_key]
204
  log.info(f"[ASSETS] Loading subject={subject_key} | index={cfg['index']}")
205
+ if not os.path.exists(cfg["index"]):
206
+ raise FileNotFoundError(cfg["index"])
207
+ if not os.path.exists(cfg["chunks"]):
208
+ raise FileNotFoundError(cfg["chunks"])
209
+ if not os.path.exists(cfg["embeddings"]):
210
+ raise FileNotFoundError(cfg["embeddings"])
211
  index = faiss.read_index(cfg["index"])
212
  with open(cfg["chunks"], "r", encoding="utf-8") as f:
213
  texts = [it["text"] for it in json.load(f)]
214
+ embs = np.load(cfg["embeddings"]) # shape: (N, dim)
215
  if index.ntotal != len(embs):
216
  raise RuntimeError(f"Mismatch ntotal({index.ntotal}) vs emb({len(embs)})")
 
217
  return SubjectAssets(index=index, texts=texts, embs=embs)
218
 
219
  # ========= ENCODER & RETRIEVAL =========
220
+
221
  @torch.inference_mode()
222
  def encode_query_exact(text: str) -> np.ndarray:
223
  toks = ENCODER_TOKENIZER(text, padding=True, truncation=True, return_tensors="pt").to(ENCODER_DEVICE)
224
  out = ENCODER_MODEL(**toks)
225
+ # simple mean pooling (CLS-less encoders)
226
  vec = out.last_hidden_state.mean(dim=1)
227
  return vec.cpu().numpy()
228
 
 
256
  return top_texts
257
 
258
  def pick_best_sentences(query: str, chunks: List[str], top_k: int = 5) -> List[str]:
259
+ if not chunks:
260
+ return []
261
  qv = encode_query_exact(query).reshape(-1)
262
  cands: List[Tuple[float, str]] = []
263
  for ch in chunks:
 
275
  def build_prompt(user_query: str, sentences: List[str]) -> str:
276
  block = "\n".join(f"- {clean_prefix(s)}" for s in sentences)
277
  system = (
278
+ "Kamu asisten RAG.\n"
279
+ "- Jawab HANYA berdasarkan daftar kalimat fakta di bawah.\n"
280
+ f"- Jika tidak ada kalimat yang relevan, tulis persis: {FALLBACK_TEXT}\n"
281
  "- Jawab TEPAT 1 kalimat, ringkas, Bahasa Indonesia baku.\n"
282
+ "- DILARANG menulis frasa meta seperti 'berdasarkan', 'menurut', 'merujuk', 'mengacu', atau 'bersumber'.\n"
283
+ "- Tulis jawaban final di dalam tag <final>... seperti: <final>Jawaban satu kalimat.</final>\n"
284
+ "- Jangan menulis apa pun setelah </final>."
285
+ )
286
+ return (
287
+ f"{system}\n\n"
288
+ f"KALIMAT SUMBER:\n{block}\n\n"
289
+ f"PERTANYAAN: {user_query}\n"
290
+ f"TULIS JAWABAN DI DALAM <final>...</final> SAJA:"
291
  )
 
 
 
 
 
 
 
292
 
293
  @lru_cache(maxsize=512)
294
  def validate_input_cached(q: str) -> bool:
 
301
  # ========= AUTH (POSTGRES) =========
302
  from werkzeug.security import generate_password_hash, check_password_hash
303
  from sqlalchemy import create_engine, Column, Integer, String, Text, Boolean, func, or_
304
+ from sqlalchemy.orm import sessionmaker, scoped_session, declarative_base, Session
305
 
306
  POSTGRES_URL = os.environ.get("POSTGRES_URL")
307
  if not POSTGRES_URL:
 
313
 
314
  class User(Base):
315
  __tablename__ = "users"
316
+ id = Column(Integer, primary_key=True)
317
+ username = Column(String(50), unique=True, nullable=False, index=True)
318
+ email = Column(String(120), unique=True, nullable=False, index=True)
319
+ password = Column(Text, nullable=False)
320
  is_active = Column(Boolean, default=True, nullable=False)
321
  is_admin = Column(Boolean, default=False, nullable=False)
322
 
323
  class ChatHistory(Base):
324
  __tablename__ = "chat_history"
325
+ id = Column(Integer, primary_key=True)
326
+ user_id = Column(Integer, nullable=False, index=True)
327
  subject_key = Column(String(50), nullable=False, index=True)
328
+ role = Column(String(10), nullable=False)
329
+ message = Column(Text, nullable=False)
330
+ timestamp = Column(Integer, server_default=func.extract("epoch", func.now()))
331
 
332
  Base.metadata.create_all(bind=engine)
333
 
334
  JKT_TZ = ZoneInfo("Asia/Jakarta")
335
+
336
  @app.template_filter("fmt_ts")
337
  def fmt_ts(epoch_int: int):
338
  try:
 
363
  return view_func(*args, **kwargs)
364
  return wrapper
365
 
 
366
  # ========= ROUTES =========
367
  @app.route("/")
368
  def root():
 
372
  def auth_login():
373
  if request.method == "POST":
374
  identity = (
375
+ request.form.get("identity") or request.form.get("email") or request.form.get("username") or ""
 
 
 
376
  ).strip().lower()
377
  pw_input = (request.form.get("password") or "").strip()
378
 
 
384
  try:
385
  user = (
386
  s.query(User)
387
+ .filter(or_(func.lower(User.username) == identity, func.lower(User.email) == identity))
388
+ .first()
 
389
  )
390
  log.info(f"[LOGIN] identity='{identity}' found={bool(user)} active={getattr(user,'is_active',None)}")
391
  ok = bool(user and user.is_active and check_password_hash(user.password, pw_input))
 
397
  return render_template("login.html"), 401
398
 
399
  session["logged_in"] = True
400
+ session["user_id"] = user.id
401
+ session["username"] = user.username
402
+ session["is_admin"] = bool(user.is_admin)
403
  log.info(f"[LOGIN] OK user_id={user.id}; session set.")
404
  return redirect(url_for("subjects"))
405
+
406
  return render_template("login.html")
407
 
408
  @app.route("/whoami")
 
414
  "is_admin": session.get("is_admin"),
415
  }
416
 
 
417
  @app.route("/auth/register", methods=["GET", "POST"])
418
  def auth_register():
419
  if request.method == "POST":
 
421
  email = (request.form.get("email") or "").strip().lower()
422
  pw = (request.form.get("password") or "").strip()
423
  confirm = (request.form.get("confirm") or "").strip()
424
+
425
  if not username or not email or not pw:
426
  flash("Semua field wajib diisi.", "error")
427
  return render_template("register.html"), 400
 
431
  if pw != confirm:
432
  flash("Konfirmasi password tidak cocok.", "error")
433
  return render_template("register.html"), 400
434
+
435
  s = db()
436
  try:
437
  existed = (
438
  s.query(User)
439
+ .filter(or_(func.lower(User.username) == username, func.lower(User.email) == email))
440
+ .first()
 
441
  )
442
  if existed:
443
  flash("Username/Email sudah terpakai.", "error")
 
446
  s.add(u); s.commit()
447
  finally:
448
  s.close()
449
+
450
  flash("Registrasi berhasil. Silakan login.", "success")
451
  return redirect(url_for("auth_login"))
452
+
453
  return render_template("register.html")
454
 
455
  @app.route("/auth/logout")
 
461
  def about():
462
  return render_template("about.html")
463
 
 
464
  @app.route("/subjects")
465
  @login_required
466
  def subjects():
467
  log.info(f"[SESSION DEBUG] logged_in={session.get('logged_in')} user_id={session.get('user_id')}")
468
  return render_template("home.html", subjects=SUBJECTS)
469
 
 
470
  @app.route("/chat/<subject_key>")
471
  @login_required
472
  def chat_subject(subject_key: str):
 
480
  uid = session.get("user_id")
481
  rows = (
482
  s.query(ChatHistory)
483
+ .filter_by(user_id=uid, subject_key=subject_key)
484
+ .order_by(ChatHistory.id.asc())
485
+ .all()
486
  )
487
  history = [{"role": r.role, "message": r.message} for r in rows]
488
  finally:
 
492
 
493
  @app.route("/health")
494
  def health():
495
+ return jsonify({
496
+ "ok": True,
497
+ "encoder_loaded": ENCODER_MODEL is not None,
498
+ "llm_loaded": LLM is not None,
499
+ "model_path": MODEL_PATH,
500
+ "ctx_window": CTX_WINDOW,
501
+ })
502
 
503
  @app.route("/ask/<subject_key>", methods=["POST"])
504
  @login_required
 
508
 
509
  # pastikan model siap saat request (lazy)
510
  warmup_models()
 
511
  t0 = time.perf_counter()
 
 
512
 
513
+ data = request.get_json(silent=True) or {}
514
+ query = (data.get("message") or "").strip()
515
  if not query:
516
  return jsonify({"ok": False, "error": "empty query"}), 400
517
+
518
  if not validate_input_cached(query):
519
  return jsonify({"ok": True, "answer": GUARDRAIL_BLOCK_TEXT})
520
 
 
532
  chunks = retrieve_rerank_cosine(query, subject_key)
533
  if not chunks:
534
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
535
+
536
  sentences = pick_best_sentences(query, chunks, top_k=5)
537
  if not sentences:
538
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
 
541
 
542
  try:
543
  raw_answer = generate(
544
+ LLM,
545
+ prompt,
546
+ max_tokens=int(os.environ.get("MAX_TOKENS", 64)),
547
+ temperature=float(os.environ.get("TEMP", 0.0)), # deterministik
548
+ top_p=1.0,
549
+ stop=["</final>"] # hentikan persis saat final ditutup
550
  )
551
  if raw_answer is None:
552
  raw_answer = ""
553
  raw_answer = raw_answer.strip()
 
 
554
  log.info(f"[LLM] Raw answer repr: {repr(raw_answer)}")
555
 
556
+ # 1) Buang blok <think>...</think> kalau ada
557
+ text = re.sub(r"<think\b[^>]*>.*?</think>", "", raw_answer, flags=re.DOTALL | re.IGNORECASE).strip()
558
+ text = re.sub(r"</?think\b[^>]*>", "", text, flags=re.IGNORECASE).strip()
 
 
559
 
560
+ # 2) Ambil isi setelah <final> (kita stop di </final>, jadi biasanya tidak ada penutup di output)
561
+ m_final = re.search(r"<final>\s*(.+)$", text, flags=re.IGNORECASE | re.DOTALL)
562
+ if m_final:
563
+ cleaned = m_final.group(1).strip()
564
+ else:
565
+ # fallback: kalau model tidak menulis <final>, pakai sisa teks non-tag
566
+ cleaned = re.sub(r"<[^>]+>", "", text).strip()
567
 
568
  answer = cleaned
569
 
 
582
  uid = session.get("user_id")
583
  s.add_all([
584
  ChatHistory(user_id=uid, subject_key=subject_key, role="user", message=query),
585
+ ChatHistory(user_id=uid, subject_key=subject_key, role="bot", message=answer),
586
  ])
587
  s.commit()
588
  except Exception as e:
589
  log.exception(f"[DB] gagal simpan chat history: {e}")
590
  finally:
591
+ try:
592
+ s.close()
593
+ except Exception:
594
+ pass
595
 
596
  if not answer or len(answer) < 2:
597
  answer = FALLBACK_TEXT
598
 
599
  if ENABLE_PROFILING:
600
+ log.info({
601
+ "latency_total": time.perf_counter() - t0,
602
+ "subject": subject_key,
603
+ "faiss_best": best,
604
+ })
605
 
606
  return jsonify({"ok": True, "answer": answer})
607
 
608
  # ===== Admin views & delete actions (tetap) =====
609
+
610
  @app.route("/admin")
611
  @admin_required
612
  def admin_dashboard():
613
  s = db()
614
  try:
615
+ total_users = s.query(func.count(User.id)).scalar() or 0
616
+ total_active = s.query(func.count(User.id)).filter(User.is_active.is_(True)).scalar() or 0
617
+ total_admins = s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0
618
+ total_msgs = s.query(func.count(ChatHistory.id)).scalar() or 0
619
  finally:
620
  s.close()
621
+ return render_template(
622
+ "admin_dashboard.html",
623
+ total_users=total_users,
624
+ total_active=total_active,
625
+ total_admins=total_admins,
626
+ total_msgs=total_msgs,
627
+ )
628
 
629
  @app.route("/admin/users")
630
  @admin_required
 
632
  q = (request.args.get("q") or "").strip().lower()
633
  page = max(int(request.args.get("page", 1)), 1)
634
  per_page = min(max(int(request.args.get("per_page", 20)), 5), 100)
635
+
636
  s = db()
637
  try:
638
  base = s.query(User)
639
  if q:
640
+ base = base.filter(
641
+ or_(
642
+ func.lower(User.username).like(f"%{q}%"),
643
+ func.lower(User.email).like(f"%{q}%"),
644
+ )
645
+ )
646
  total = base.count()
647
+ users = (
648
+ base.order_by(User.id.asc())
649
+ .offset((page - 1) * per_page)
650
+ .limit(per_page)
651
+ .all()
652
+ )
653
  user_ids = [u.id for u in users] or [-1]
654
+ counts = dict(
655
+ s.query(ChatHistory.user_id, func.count(ChatHistory.id))
656
+ .filter(ChatHistory.user_id.in_(user_ids))
657
+ .group_by(ChatHistory.user_id)
658
+ .all()
659
+ )
660
  finally:
661
  s.close()
662
+
663
+ return render_template("admin_users.html", users=users, counts=counts, q=q, page=page, per_page=per_page, total=total)
 
664
 
665
  @app.route("/admin/history")
666
  @admin_required
667
  def admin_history():
668
+ q = (request.args.get("q") or "").strip().lower()
669
+ username = (request.args.get("username") or "").strip().lower()
670
+ subject = (request.args.get("subject") or "").strip().lower()
671
+ role = (request.args.get("role") or "").strip().lower()
672
+
673
+ page = max(int(request.args.get("page", 1)), 1)
674
+ per_page = min(max(int(request.args.get("per_page", 30)), 5), 200)
675
 
676
  s = db()
677
  try:
 
679
  if q:
680
  base = base.filter(func.lower(ChatHistory.message).like(f"%{q}%"))
681
  if username:
682
+ base = base.filter(
683
+ or_(
684
+ func.lower(User.username) == username,
685
+ func.lower(User.email) == username,
686
+ )
687
+ )
688
  if subject:
689
  base = base.filter(func.lower(ChatHistory.subject_key) == subject)
690
  if role in ("user", "bot"):
691
  base = base.filter(ChatHistory.role == role)
692
+
693
  total = base.count()
694
+ rows = (
695
+ base.order_by(ChatHistory.id.desc())
696
  .offset((page - 1) * per_page)
697
  .limit(per_page)
698
+ .all()
699
+ )
700
  finally:
701
  s.close()
702
 
 
710
  "timestamp": r.ChatHistory.timestamp,
711
  } for r in rows]
712
 
713
+ return render_template(
714
+ "admin_history.html",
715
+ items=items,
716
+ subjects=SUBJECTS,
717
+ q=q,
718
+ username=username,
719
+ subject=subject,
720
+ role=role,
721
+ page=page,
722
+ per_page=per_page,
723
+ total=total,
724
+ )
725
+
726
 
727
  def _is_last_admin(s: Session) -> bool:
728
  return (s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0) <= 1