Dyraa18 commited on
Commit
187181f
·
verified ·
1 Parent(s): 2a361d1
Files changed (1) hide show
  1. app.py +252 -211
app.py CHANGED
@@ -1,14 +1,23 @@
1
- # app.py
2
- # Flask RAG app (HF Spaces / Static) — dataset sudah ada di Space.
 
 
 
 
 
 
3
  import os, json, re, time, logging
4
- from functools import lru_cache
5
  from typing import Dict, List, Tuple
6
  from dataclasses import dataclass
7
  from datetime import datetime
8
  from zoneinfo import ZoneInfo
9
  from pathlib import Path
10
 
11
- from flask import Flask, render_template, request, redirect, url_for, session, jsonify, flash
 
 
 
12
  import numpy as np
13
  import faiss
14
  import torch
@@ -20,70 +29,72 @@ load_dotenv()
20
  # ========= ENV & LOGGING =========
21
  os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
22
  os.environ.setdefault("OMP_NUM_THREADS", "1")
23
- torch.set_num_threads(1)
24
- torch.set_num_interop_threads(1)
 
 
 
 
25
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
26
  log = logging.getLogger("rag-app")
27
 
28
- # ========= IMPORT EKSTERNAL =========
29
- from Guardrail import validate_input # -> bool (lazy di file)
30
  from Model import load_model, generate # -> llama.cpp wrapper
31
 
32
- # ========= PATH ROOT PROYEK =========
33
  BASE_DIR = Path(__file__).resolve().parent
34
 
35
- # ========= KONFIGURASI RAG =========
36
- MODEL_PATH = str(BASE_DIR / "models" / os.getenv("GGUF_FILENAME", "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf"))
37
- CTX_WINDOW = 4096
38
- N_GPU_LAYERS = 0 # HF Spaces CPU only
39
- N_THREADS = int(os.environ.get("NUM_THREADS", "4"))
 
40
 
41
- ENCODER_NAME = "intfloat/multilingual-e5-large"
42
  ENCODER_DEVICE = torch.device("cpu")
43
 
44
- # Dataset sudah ada di Space → path RELATIF
45
  SUBJECTS: Dict[str, Dict[str, str]] = {
46
  "ipas": {
47
- "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Ipas" / "IPA_index.index"),
48
- "chunks": str(BASE_DIR / "Dataset" / "Ipas" / "Chunk" / "ipas_chunks.json"),
49
- "embeddings": str(BASE_DIR / "Dataset" / "Ipas" / "Embedd"/ "ipas_embeddings.npy"),
50
- "label": "IPAS",
51
- "desc": "Ilmu Pengetahuan Alam dan Sosial"
52
  },
53
  "penjas": {
54
- "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Penjas" / "PENJAS_index.index"),
55
- "chunks": str(BASE_DIR / "Dataset" / "Penjas" / "Chunk" / "penjas_chunks.json"),
56
- "embeddings": str(BASE_DIR / "Dataset" / "Penjas" / "Embedd" / "penjas_embeddings.npy"),
57
- "label": "PJOK",
58
- "desc": "Pendidikan Jasmani, Olahraga, dan Kesehatan"
59
  },
60
  "pancasila": {
61
- "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Pancasila" / "PANCASILA_index.index"),
62
- "chunks": str(BASE_DIR / "Dataset" / "Pancasila" / "Chunk" / "pancasila_chunks.json"),
63
- "embeddings": str(BASE_DIR / "Dataset" / "Pancasila" / "Embedd" / "pancasila_embeddings.npy"),
64
- "label": "PANCASILA",
65
- "desc": "Pendidikan Pancasila dan Kewarganegaraan"
66
  }
67
  }
68
 
69
- # Threshold dan fallback
70
- TOP_K_FAISS = 24
71
- TOP_K_FINAL = 10
72
- MIN_COSINE = 0.84
73
- MIN_HYBRID = 0.15
74
-
75
- FALLBACK_TEXT = "maap pengetahuan tidak ada dalam database"
76
- GUARDRAIL_BLOCK_TEXT = "maap, pertanyaan ditolak oleh guardrail"
77
- ENABLE_PROFILING = False
78
 
79
  # ========= APP =========
80
  app = Flask(__name__)
81
  app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-secret-please-change")
82
 
83
  from werkzeug.middleware.proxy_fix import ProxyFix
84
-
85
  app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
86
- # supaya session tersimpan di browser saat lewat proxy/HTTPS (HF Spaces)
87
  app.config.update(
88
  SESSION_COOKIE_NAME="session",
89
  SESSION_COOKIE_SAMESITE="None",
@@ -93,11 +104,10 @@ app.config.update(
93
  PREFERRED_URL_SCHEME="https",
94
  )
95
 
96
-
97
- # ========= GLOBAL MODEL =========
98
  ENCODER_TOKENIZER = None
99
- ENCODER_MODEL = None
100
- LLM = None
101
 
102
  @dataclass(frozen=True)
103
  class SubjectAssets:
@@ -105,18 +115,25 @@ class SubjectAssets:
105
  texts: List[str]
106
  embs: np.ndarray
107
 
108
- # ========= TEKS UTILITAS =========
109
  STOPWORDS_ID = {
110
  "yang","dan","atau","pada","di","ke","dari","itu","ini","adalah","dengan",
111
  "untuk","serta","sebagai","oleh","dalam","akan","kamu","apa","karena",
112
  "agar","sehingga","terhadap","dapat","juga","para","diri",
113
  }
114
  TOKEN_RE = re.compile(r"[A-Za-zÀ-ÖØ-öø-ÿ]+", re.UNICODE)
 
 
 
 
 
115
  def tok_id(text: str) -> List[str]:
116
- return [t.lower() for t in TOKEN_RE.findall(text or "") if t.lower() not in STOPWORDS_ID]
 
117
  def lexical_overlap(query: str, sent: str) -> float:
118
  q = set(tok_id(query)); s = set(tok_id(sent))
119
- if not q or not s: return 0.0
 
120
  return len(q & s) / max(1, len(q | s))
121
 
122
  QUESTION_LIKE_RE = re.compile(r"(^\s*(apa|mengapa|bagaimana|sebutkan|jelaskan)\b|[?]$)", re.IGNORECASE)
@@ -133,9 +150,10 @@ META_PREFIX_RE = re.compile(r"^\s*(?:" + r"|".join(META_PREFIX_PATTERNS) + r")\s
133
 
134
  def clean_prefix(t: str) -> str:
135
  t = (t or "").strip()
136
- for _ in range(5):
137
  t2 = META_PREFIX_RE.sub("", t).lstrip()
138
- if t2 == t: break
 
139
  t = t2
140
  return t
141
 
@@ -143,23 +161,31 @@ def strip_meta_sentence(s: str) -> str:
143
  s = clean_prefix(s or "")
144
  if re.match(r"^\s*(berdasarkan|menurut|merujuk|mengacu|bersumber|dari)\b", s, re.IGNORECASE):
145
  s = re.sub(r"^\s*[^,.;!?]*[,.;!?]\s*", "", s) or s
146
- s = clean_prefix(s)
147
  return s.strip()
148
 
149
  SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+")
150
- def split_sentences(text: str) -> List[str]:
 
 
151
  outs = []
152
  for p in SENT_SPLIT_RE.split(text or ""):
153
  s = clean_prefix((p or "").strip())
154
- if not s: continue
155
- if s[-1] not in ".!?": s += "."
156
- if QUESTION_LIKE_RE.search(s): continue
157
- if INSTRUCTION_RE.search(s): continue
158
- if len(s.strip()) < 10: continue
 
 
 
 
 
159
  outs.append(s)
160
  return outs
161
 
162
- # ========= MODEL WARMUP (LAZY) =========
 
163
  def warmup_models():
164
  global ENCODER_TOKENIZER, ENCODER_MODEL, LLM
165
  if ENCODER_TOKENIZER is None or ENCODER_MODEL is None:
@@ -167,31 +193,35 @@ def warmup_models():
167
  ENCODER_TOKENIZER = AutoTokenizer.from_pretrained(ENCODER_NAME)
168
  ENCODER_MODEL = AutoModel.from_pretrained(ENCODER_NAME).to(ENCODER_DEVICE).eval()
169
  if LLM is None:
170
- log.info(f"[INIT] Load LLM: {MODEL_PATH}")
171
  LLM = load_model(MODEL_PATH, n_ctx=CTX_WINDOW, n_gpu_layers=N_GPU_LAYERS, n_threads=N_THREADS)
172
 
173
- # ========= LOAD ASSETS PER-MAPEL =========
 
174
  @lru_cache(maxsize=8)
175
- def load_subject_assets(subject_key: str) -> SubjectAssets:
176
  if subject_key not in SUBJECTS:
177
  raise ValueError(f"Unknown subject: {subject_key}")
178
  cfg = SUBJECTS[subject_key]
179
  log.info(f"[ASSETS] Loading subject={subject_key} | index={cfg['index']}")
180
- if not os.path.exists(cfg["index"]): raise FileNotFoundError(cfg["index"])
181
- if not os.path.exists(cfg["chunks"]): raise FileNotFoundError(cfg["chunks"])
182
- if not os.path.exists(cfg["embeddings"]): raise FileNotFoundError(cfg["embeddings"])
183
-
 
 
184
  index = faiss.read_index(cfg["index"])
185
  with open(cfg["chunks"], "r", encoding="utf-8") as f:
186
- texts = [it["text"] for it in json.load(f)]
187
- embs = np.load(cfg["embeddings"])
188
  if index.ntotal != len(embs):
189
  raise RuntimeError(f"Mismatch ntotal({index.ntotal}) vs emb({len(embs)})")
190
-
191
  return SubjectAssets(index=index, texts=texts, embs=embs)
192
 
193
- # ========= ENCODER & RETRIEVAL =========
 
194
  @torch.inference_mode()
 
195
  def encode_query_exact(text: str) -> np.ndarray:
196
  toks = ENCODER_TOKENIZER(text, padding=True, truncation=True, return_tensors="pt").to(ENCODER_DEVICE)
197
  out = ENCODER_MODEL(**toks)
@@ -200,7 +230,10 @@ def encode_query_exact(text: str) -> np.ndarray:
200
 
201
  def cosine_sim(a: np.ndarray, b: np.ndarray) -> float:
202
  a = np.asarray(a).reshape(-1); b = np.asarray(b).reshape(-1)
203
- return float(np.dot(a, b) / ((np.linalg.norm(a) * np.linalg.norm(b)) + 1e-12))
 
 
 
204
 
205
  def best_cosine_from_faiss(query: str, subject_key: str) -> float:
206
  assets = load_subject_assets(subject_key)
@@ -213,53 +246,54 @@ def best_cosine_from_faiss(query: str, subject_key: str) -> float:
213
  best = max(best, cosine_sim(qv, assets.embs[i]))
214
  return best
215
 
216
- def retrieve_rerank_cosine(query: str, subject_key: str) -> List[str]:
217
  assets = load_subject_assets(subject_key)
218
  q = encode_query_exact(query)
219
- D, idx = assets.index.search(q, TOP_K_FAISS)
220
  idxs = [i for i in idx[0] if 0 <= i < len(assets.texts)]
221
- if not idxs:
222
- return []
223
- qv = q.reshape(-1)
224
- scores = [cosine_sim(qv, assets.embs[i]) for i in idxs]
225
- pairs = sorted(zip(scores, idxs), reverse=True)
226
- top_texts = [assets.texts[i] for _, i in pairs[:TOP_K_FINAL]]
227
- log.info(f"[RETRIEVE] subject={subject_key} | top={len(top_texts)}")
228
- return top_texts
229
-
230
- def pick_best_sentences(query: str, chunks: List[str], top_k: int = 5) -> List[str]:
231
- if not chunks: return []
232
- qv = encode_query_exact(query).reshape(-1)
233
  cands: List[Tuple[float, str]] = []
234
  for ch in chunks:
235
- for s in split_sentences(ch):
236
- sv = encode_query_exact(s).reshape(-1)
237
- cos = cosine_sim(qv, sv)
238
  ovl = lexical_overlap(query, s)
239
- penalty = 0.1 if len(s) < 50 else 0.0
240
- score = 0.7 * cos + 0.3 * ovl - penalty
241
- if score >= MIN_HYBRID:
242
- cands.append((score, s))
 
 
 
243
  cands.sort(key=lambda x: x[0], reverse=True)
244
  return [s for _, s in cands[:top_k]]
245
 
 
 
246
  def build_prompt(user_query: str, sentences: List[str]) -> str:
247
  block = "\n".join(f"- {clean_prefix(s)}" for s in sentences)
248
  system = (
249
- "- Gunakan HANYA daftar kalimat fakta berikut sebagai sumber.\n"
250
- "- Jika tidak ada kalimat yang menjawab, balas: maap pengetahuan tidak ada dalam database\n"
251
- "- Jawab TEPAT 1 kalimat, ringkas, Bahasa Indonesia baku.\n"
252
- "- DILARANG menulis frasa meta seperti 'berdasarkan', 'menurut', 'merujuk', atau 'bersumber'."
 
 
 
 
 
 
 
253
  )
254
- return f"""{system}
255
- KALIMAT SUMBER:
256
- {block}
257
- PERTANYAAN:
258
- {user_query}
259
- JAWAB (1 kalimat saja):
260
- """
261
-
262
- @lru_cache(maxsize=512)
263
  def validate_input_cached(q: str) -> bool:
264
  try:
265
  return validate_input(q)
@@ -270,7 +304,7 @@ def validate_input_cached(q: str) -> bool:
270
  # ========= AUTH (POSTGRES) =========
271
  from werkzeug.security import generate_password_hash, check_password_hash
272
  from sqlalchemy import create_engine, Column, Integer, String, Text, Boolean, func, or_
273
- from sqlalchemy.orm import sessionmaker, scoped_session, declarative_base
274
 
275
  POSTGRES_URL = os.environ.get("POSTGRES_URL")
276
  if not POSTGRES_URL:
@@ -282,25 +316,26 @@ Base = declarative_base()
282
 
283
  class User(Base):
284
  __tablename__ = "users"
285
- id = Column(Integer, primary_key=True)
286
- username = Column(String(50), unique=True, nullable=False, index=True)
287
- email = Column(String(120), unique=True, nullable=False, index=True)
288
- password = Column(Text, nullable=False)
289
  is_active = Column(Boolean, default=True, nullable=False)
290
  is_admin = Column(Boolean, default=False, nullable=False)
291
 
292
  class ChatHistory(Base):
293
  __tablename__ = "chat_history"
294
- id = Column(Integer, primary_key=True)
295
- user_id = Column(Integer, nullable=False, index=True)
296
  subject_key = Column(String(50), nullable=False, index=True)
297
- role = Column(String(10), nullable=False)
298
- message = Column(Text, nullable=False)
299
- timestamp = Column(Integer, server_default=func.extract("epoch", func.now()))
300
 
301
  Base.metadata.create_all(bind=engine)
302
 
303
  JKT_TZ = ZoneInfo("Asia/Jakarta")
 
304
  @app.template_filter("fmt_ts")
305
  def fmt_ts(epoch_int: int):
306
  try:
@@ -313,14 +348,15 @@ def db():
313
  return SessionLocal()
314
 
315
  def login_required(view_func):
 
316
  def wrapper(*args, **kwargs):
317
  if not session.get("logged_in"):
318
  return redirect(url_for("auth_login"))
319
  return view_func(*args, **kwargs)
320
- wrapper.__name__ = view_func.__name__
321
  return wrapper
322
 
323
  def admin_required(view_func):
 
324
  def wrapper(*args, **kwargs):
325
  if not session.get("logged_in"):
326
  return redirect(url_for("auth_login"))
@@ -328,10 +364,8 @@ def admin_required(view_func):
328
  flash("Hanya admin yang boleh mengakses halaman itu.", "error")
329
  return redirect(url_for("subjects"))
330
  return view_func(*args, **kwargs)
331
- wrapper.__name__ = view_func.__name__
332
  return wrapper
333
 
334
-
335
  # ========= ROUTES =========
336
  @app.route("/")
337
  def root():
@@ -341,38 +375,30 @@ def root():
341
  def auth_login():
342
  if request.method == "POST":
343
  identity = (
344
- request.form.get("identity")
345
- or request.form.get("email")
346
- or request.form.get("username")
347
- or ""
348
  ).strip().lower()
349
  pw_input = (request.form.get("password") or "").strip()
350
-
351
  if not identity or not pw_input:
352
  flash("Mohon isi email/username dan password.", "error")
353
  return render_template("login.html"), 400
354
-
355
  s = db()
356
  try:
357
  user = (
358
  s.query(User)
359
- .filter(or_(func.lower(User.username) == identity,
360
- func.lower(User.email) == identity))
361
- .first()
362
  )
363
  log.info(f"[LOGIN] identity='{identity}' found={bool(user)} active={getattr(user,'is_active',None)}")
364
  ok = bool(user and user.is_active and check_password_hash(user.password, pw_input))
365
  finally:
366
  s.close()
367
-
368
  if not ok:
369
  flash("Identitas atau password salah.", "error")
370
  return render_template("login.html"), 401
371
-
372
  session["logged_in"] = True
373
- session["user_id"] = user.id
374
- session["username"] = user.username
375
- session["is_admin"] = bool(user.is_admin)
376
  log.info(f"[LOGIN] OK user_id={user.id}; session set.")
377
  return redirect(url_for("subjects"))
378
  return render_template("login.html")
@@ -386,7 +412,6 @@ def whoami():
386
  "is_admin": session.get("is_admin"),
387
  }
388
 
389
-
390
  @app.route("/auth/register", methods=["GET", "POST"])
391
  def auth_register():
392
  if request.method == "POST":
@@ -407,9 +432,8 @@ def auth_register():
407
  try:
408
  existed = (
409
  s.query(User)
410
- .filter(or_(func.lower(User.username) == username,
411
- func.lower(User.email) == email))
412
- .first()
413
  )
414
  if existed:
415
  flash("Username/Email sudah terpakai.", "error")
@@ -431,14 +455,12 @@ def auth_logout():
431
  def about():
432
  return render_template("about.html")
433
 
434
-
435
  @app.route("/subjects")
436
  @login_required
437
  def subjects():
438
  log.info(f"[SESSION DEBUG] logged_in={session.get('logged_in')} user_id={session.get('user_id')}")
439
  return render_template("home.html", subjects=SUBJECTS)
440
 
441
-
442
  @app.route("/chat/<subject_key>")
443
  @login_required
444
  def chat_subject(subject_key: str):
@@ -446,39 +468,41 @@ def chat_subject(subject_key: str):
446
  return redirect(url_for("subjects"))
447
  session["subject_selected"] = subject_key
448
  label = SUBJECTS[subject_key]["label"]
449
-
450
  s = db()
451
  try:
452
  uid = session.get("user_id")
453
  rows = (
454
  s.query(ChatHistory)
455
- .filter_by(user_id=uid, subject_key=subject_key)
456
- .order_by(ChatHistory.id.asc())
457
- .all()
458
  )
459
  history = [{"role": r.role, "message": r.message} for r in rows]
460
  finally:
461
  s.close()
462
-
463
  return render_template("chat.html", subject=subject_key, subject_label=label, history=history)
464
 
465
  @app.route("/health")
466
  def health():
467
- return jsonify({"ok": True, "encoder_loaded": ENCODER_MODEL is not None, "llm_loaded": LLM is not None})
 
 
 
 
 
 
 
468
 
469
  @app.route("/ask/<subject_key>", methods=["POST"])
470
  @login_required
471
  def ask(subject_key: str):
472
  if subject_key not in SUBJECTS:
473
  return jsonify({"ok": False, "error": "invalid subject"}), 400
474
-
475
- # pastikan model siap saat request (lazy)
476
  warmup_models()
477
-
478
  t0 = time.perf_counter()
479
- data = request.get_json(silent=True) or {}
480
- query = (data.get("message") or "").strip()
481
 
 
 
482
  if not query:
483
  return jsonify({"ok": False, "error": "empty query"}), 400
484
  if not validate_input_cached(query):
@@ -495,71 +519,112 @@ def ask(subject_key: str):
495
  if best < MIN_COSINE:
496
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
497
 
498
- chunks = retrieve_rerank_cosine(query, subject_key)
499
  if not chunks:
500
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
501
- sentences = pick_best_sentences(query, chunks, top_k=5)
 
502
  if not sentences:
503
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
504
 
505
  prompt = build_prompt(query, sentences)
506
 
507
  try:
508
- answer = generate(
509
- LLM, prompt,
510
- max_tokens=64, temperature=0.2, top_p=1.0,
511
- stop=["\n\n", "\n###", "###", "\nUser:",
512
- "Berdasarkan", "berdasarkan", "Menurut", "menurut",
513
- "Merujuk", "merujuk", "Mengacu", "mengacu", "Bersumber", "bersumber"]
514
- ).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  except Exception as e:
516
  log.exception(f"[LLM] generate error: {e}")
517
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
518
 
 
519
  m = re.search(r"(.+?[.!?])(\s|$)", answer)
520
  answer = (m.group(1) if m else answer).strip()
521
  answer = strip_meta_sentence(answer)
522
 
523
- # === Simpan ke history ===
524
  try:
525
  s = db()
526
  uid = session.get("user_id")
527
  s.add_all([
528
  ChatHistory(user_id=uid, subject_key=subject_key, role="user", message=query),
529
- ChatHistory(user_id=uid, subject_key=subject_key, role="bot", message=answer)
530
  ])
531
  s.commit()
532
  except Exception as e:
533
  log.exception(f"[DB] gagal simpan chat history: {e}")
534
  finally:
535
- s.close()
 
 
 
536
 
537
  if not answer or len(answer) < 2:
538
  answer = FALLBACK_TEXT
539
 
540
  if ENABLE_PROFILING:
541
- log.info({"latency_total": time.perf_counter() - t0, "subject": subject_key, "faiss_best": best})
 
 
 
 
542
 
543
  return jsonify({"ok": True, "answer": answer})
544
 
545
- # ===== Admin views & delete actions (tetap) =====
546
- from sqlalchemy.orm import Session
547
  @app.route("/admin")
548
  @admin_required
549
  def admin_dashboard():
550
  s = db()
551
  try:
552
- total_users = s.query(func.count(User.id)).scalar() or 0
553
- total_active = s.query(func.count(User.id)).filter(User.is_active.is_(True)).scalar() or 0
554
- total_admins = s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0
555
- total_msgs = s.query(func.count(ChatHistory.id)).scalar() or 0
556
  finally:
557
  s.close()
558
- return render_template("admin_dashboard.html",
559
- total_users=total_users,
560
- total_active=total_active,
561
- total_admins=total_admins,
562
- total_msgs=total_msgs)
563
 
564
  @app.route("/admin/users")
565
  @admin_required
@@ -571,59 +636,39 @@ def admin_users():
571
  try:
572
  base = s.query(User)
573
  if q:
574
- base = base.filter(or_(
575
- func.lower(User.username).like(f"%{q}%"),
576
- func.lower(User.email).like(f"%{q}%")
577
- ))
578
  total = base.count()
579
- users = (base
580
- .order_by(User.id.asc())
581
- .offset((page - 1) * per_page)
582
- .limit(per_page)
583
- .all())
584
  user_ids = [u.id for u in users] or [-1]
585
- counts = dict(s.query(ChatHistory.user_id, func.count(ChatHistory.id))
586
- .filter(ChatHistory.user_id.in_(user_ids))
587
- .group_by(ChatHistory.user_id)
588
- .all())
589
  finally:
590
  s.close()
591
- return render_template("admin_users.html",
592
- users=users, counts=counts,
593
- q=q, page=page, per_page=per_page, total=total)
594
 
595
  @app.route("/admin/history")
596
  @admin_required
597
  def admin_history():
598
- q = (request.args.get("q") or "").strip().lower()
599
- username = (request.args.get("username") or "").strip().lower()
600
- subject = (request.args.get("subject") or "").strip().lower()
601
- role = (request.args.get("role") or "").strip().lower()
602
- page = max(int(request.args.get("page", 1)), 1)
603
- per_page = min(max(int(request.args.get("per_page", 30)), 5), 200)
604
-
605
  s = db()
606
  try:
607
  base = (s.query(ChatHistory, User).join(User, User.id == ChatHistory.user_id))
608
  if q:
609
  base = base.filter(func.lower(ChatHistory.message).like(f"%{q}%"))
610
  if username:
611
- base = base.filter(or_(
612
- func.lower(User.username) == username,
613
- func.lower(User.email) == username
614
- ))
615
  if subject:
616
  base = base.filter(func.lower(ChatHistory.subject_key) == subject)
617
  if role in ("user", "bot"):
618
  base = base.filter(ChatHistory.role == role)
619
  total = base.count()
620
- rows = (base.order_by(ChatHistory.id.desc())
621
- .offset((page - 1) * per_page)
622
- .limit(per_page)
623
- .all())
624
  finally:
625
  s.close()
626
-
627
  items = [{
628
  "id": r.ChatHistory.id,
629
  "username": r.User.username,
@@ -633,11 +678,7 @@ def admin_history():
633
  "message": r.ChatHistory.message,
634
  "timestamp": r.ChatHistory.timestamp,
635
  } for r in rows]
636
-
637
- return render_template("admin_history.html",
638
- items=items, subjects=SUBJECTS,
639
- q=q, username=username, subject=subject, role=role,
640
- page=page, per_page=per_page, total=total)
641
 
642
  def _is_last_admin(s: Session) -> bool:
643
  return (s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0) <= 1
@@ -708,4 +749,4 @@ def admin_delete_chat(chat_id: int):
708
  # ========= ENTRY =========
709
  if __name__ == "__main__":
710
  port = int(os.environ.get("PORT", 7860))
711
- app.run(host="0.0.0.0", port=port, debug=False)
 
1
+ # app.py (HF Spaces CPU-Optimized)
2
+ # RAG sekolah super hemat CPU:
3
+ # - Default model: 3B instruct (GGUF) + ctx 1024
4
+ # - Retrieval cepat: FAISS top-12 → pilih kalimat pakai lexical overlap (tanpa encode per-kalimat)
5
+ # - Encoder dipakai HANYA untuk query & FAISS (1x per request)
6
+ # - Jawaban final lewat <final>...</final>, stop di </final>, retry kalau kosong/ellipsis
7
+ # - Admin + Auth Postgres tetap sama
8
+
9
  import os, json, re, time, logging
10
+ from functools import lru_cache, wraps
11
  from typing import Dict, List, Tuple
12
  from dataclasses import dataclass
13
  from datetime import datetime
14
  from zoneinfo import ZoneInfo
15
  from pathlib import Path
16
 
17
+ from flask import (
18
+ Flask, render_template, request, redirect, url_for, session, jsonify, flash
19
+ )
20
+
21
  import numpy as np
22
  import faiss
23
  import torch
 
29
  # ========= ENV & LOGGING =========
30
  os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
31
  os.environ.setdefault("OMP_NUM_THREADS", "1")
32
+ try:
33
+ torch.set_num_threads(int(os.environ.get("NUM_THREADS", "3"))) # 3 thread cukup di CPU Spaces
34
+ torch.set_num_interop_threads(1)
35
+ except Exception:
36
+ pass
37
+
38
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
39
  log = logging.getLogger("rag-app")
40
 
41
+ # ========= IMPORT EKSTERNAL (wrapper & guardrail) =========
42
+ from Guardrail import validate_input # -> bool
43
  from Model import load_model, generate # -> llama.cpp wrapper
44
 
45
+ # ========= PATH ROOT =========
46
  BASE_DIR = Path(__file__).resolve().parent
47
 
48
+ # ========= KONFIG MODEL & RAG (di-tune untuk CPU) =========
49
+ GGUF_DEFAULT = "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf" # kecil & cepat; upload ke /models
50
+ MODEL_PATH = str(BASE_DIR / "models" / os.getenv("GGUF_FILENAME", GGUF_DEFAULT))
51
+ CTX_WINDOW = int(os.environ.get("CTX_WINDOW", 1024))
52
+ N_GPU_LAYERS = int(os.environ.get("N_GPU_LAYERS", 0))
53
+ N_THREADS = int(os.environ.get("NUM_THREADS", 3))
54
 
55
+ ENCODER_NAME = os.environ.get("ENCODER_NAME", "intfloat/multilingual-e5-large")
56
  ENCODER_DEVICE = torch.device("cpu")
57
 
58
+ # Dataset sudah ada di Space → path RELATIF (samakan dengan struktur kamu)
59
  SUBJECTS: Dict[str, Dict[str, str]] = {
60
  "ipas": {
61
+ "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Ipas" / "IPA_index.index"),
62
+ "chunks": str(BASE_DIR / "Dataset" / "Ipas" / "Chunk" / "ipas_chunks.json"),
63
+ "embeddings": str(BASE_DIR / "Dataset" / "Ipas" / "Embedd"/ "ipas_embeddings.npy"),
64
+ "label": "IPAS",
65
+ "desc": "Ilmu Pengetahuan Alam dan Sosial"
66
  },
67
  "penjas": {
68
+ "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Penjas" / "PENJAS_index.index"),
69
+ "chunks": str(BASE_DIR / "Dataset" / "Penjas" / "Chunk" / "penjas_chunks.json"),
70
+ "embeddings": str(BASE_DIR / "Dataset" / "Penjas" / "Embedd" / "penjas_embeddings.npy"),
71
+ "label": "PJOK",
72
+ "desc": "Pendidikan Jasmani, Olahraga, dan Kesehatan"
73
  },
74
  "pancasila": {
75
+ "index": str(BASE_DIR / "Rag-Pipeline" / "Vektor Database" / "Pancasila" / "PANCASILA_index.index"),
76
+ "chunks": str(BASE_DIR / "Dataset" / "Pancasila" / "Chunk" / "pancasila_chunks.json"),
77
+ "embeddings": str(BASE_DIR / "Dataset" / "Pancasila" / "Embedd" / "pancasila_embeddings.npy"),
78
+ "label": "PANCASILA",
79
+ "desc": "Pendidikan Pancasila dan Kewarganegaraan"
80
  }
81
  }
82
 
83
+ # Threshold & parameter cepat
84
+ TOP_K_FAISS = int(os.environ.get("TOP_K_FAISS", 12))
85
+ TOP_K_FINAL = int(os.environ.get("TOP_K_FINAL", 6))
86
+ MIN_COSINE = float(os.environ.get("MIN_COSINE", 0.80)) # lebih longgar biar jarang fallback
87
+ MIN_LEXICAL = float(os.environ.get("MIN_LEXICAL", 0.10))
88
+ FALLBACK_TEXT = os.environ.get("FALLBACK_TEXT", "maap pengetahuan tidak ada dalam database")
89
+ GUARDRAIL_BLOCK_TEXT = os.environ.get("GUARDRAIL_BLOCK_TEXT", "maap, pertanyaan ditolak oleh guardrail")
90
+ ENABLE_PROFILING = os.environ.get("ENABLE_PROFILING", "false").lower() == "true"
 
91
 
92
  # ========= APP =========
93
  app = Flask(__name__)
94
  app.secret_key = os.environ.get("FLASK_SECRET_KEY", "dev-secret-please-change")
95
 
96
  from werkzeug.middleware.proxy_fix import ProxyFix
 
97
  app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
 
98
  app.config.update(
99
  SESSION_COOKIE_NAME="session",
100
  SESSION_COOKIE_SAMESITE="None",
 
104
  PREFERRED_URL_SCHEME="https",
105
  )
106
 
107
+ # ========= GLOBALS =========
 
108
  ENCODER_TOKENIZER = None
109
+ ENCODER_MODEL = None
110
+ LLM = None
111
 
112
  @dataclass(frozen=True)
113
  class SubjectAssets:
 
115
  texts: List[str]
116
  embs: np.ndarray
117
 
118
+ # ========= TEKS UTIL =========
119
  STOPWORDS_ID = {
120
  "yang","dan","atau","pada","di","ke","dari","itu","ini","adalah","dengan",
121
  "untuk","serta","sebagai","oleh","dalam","akan","kamu","apa","karena",
122
  "agar","sehingga","terhadap","dapat","juga","para","diri",
123
  }
124
  TOKEN_RE = re.compile(r"[A-Za-zÀ-ÖØ-öø-ÿ]+", re.UNICODE)
125
+
126
+ @lru_cache(maxsize=4096)
127
+ def _tok_cached(word: str) -> str:
128
+ return word.lower()
129
+
130
  def tok_id(text: str) -> List[str]:
131
+ return [tw for w in TOKEN_RE.findall(text or "") if (tw:=_tok_cached(w)) not in STOPWORDS_ID]
132
+
133
  def lexical_overlap(query: str, sent: str) -> float:
134
  q = set(tok_id(query)); s = set(tok_id(sent))
135
+ if not q or not s:
136
+ return 0.0
137
  return len(q & s) / max(1, len(q | s))
138
 
139
  QUESTION_LIKE_RE = re.compile(r"(^\s*(apa|mengapa|bagaimana|sebutkan|jelaskan)\b|[?]$)", re.IGNORECASE)
 
150
 
151
  def clean_prefix(t: str) -> str:
152
  t = (t or "").strip()
153
+ for _ in range(3):
154
  t2 = META_PREFIX_RE.sub("", t).lstrip()
155
+ if t2 == t:
156
+ break
157
  t = t2
158
  return t
159
 
 
161
  s = clean_prefix(s or "")
162
  if re.match(r"^\s*(berdasarkan|menurut|merujuk|mengacu|bersumber|dari)\b", s, re.IGNORECASE):
163
  s = re.sub(r"^\s*[^,.;!?]*[,.;!?]\s*", "", s) or s
164
+ s = clean_prefix(s)
165
  return s.strip()
166
 
167
  SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+")
168
+
169
+ def split_sentences_fast(text: str) -> List[str]:
170
+ # tanpa encoding per-kalimat
171
  outs = []
172
  for p in SENT_SPLIT_RE.split(text or ""):
173
  s = clean_prefix((p or "").strip())
174
+ if not s:
175
+ continue
176
+ if s[-1] not in ".!?":
177
+ s += "."
178
+ if QUESTION_LIKE_RE.search(s):
179
+ continue
180
+ if INSTRUCTION_RE.search(s):
181
+ continue
182
+ if len(s) < 12:
183
+ continue
184
  outs.append(s)
185
  return outs
186
 
187
+ # ========= MODEL WARMUP =========
188
+
189
  def warmup_models():
190
  global ENCODER_TOKENIZER, ENCODER_MODEL, LLM
191
  if ENCODER_TOKENIZER is None or ENCODER_MODEL is None:
 
193
  ENCODER_TOKENIZER = AutoTokenizer.from_pretrained(ENCODER_NAME)
194
  ENCODER_MODEL = AutoModel.from_pretrained(ENCODER_NAME).to(ENCODER_DEVICE).eval()
195
  if LLM is None:
196
+ log.info(f"[INIT] Load LLM: {MODEL_PATH} | ctx={CTX_WINDOW} | threads={N_THREADS}")
197
  LLM = load_model(MODEL_PATH, n_ctx=CTX_WINDOW, n_gpu_layers=N_GPU_LAYERS, n_threads=N_THREADS)
198
 
199
+ # ========= ASSETS =========
200
+
201
  @lru_cache(maxsize=8)
202
+ def load_subject_assets(subject_key: str) -> "SubjectAssets":
203
  if subject_key not in SUBJECTS:
204
  raise ValueError(f"Unknown subject: {subject_key}")
205
  cfg = SUBJECTS[subject_key]
206
  log.info(f"[ASSETS] Loading subject={subject_key} | index={cfg['index']}")
207
+ if not os.path.exists(cfg["index"]):
208
+ raise FileNotFoundError(cfg["index"])
209
+ if not os.path.exists(cfg["chunks"]):
210
+ raise FileNotFoundError(cfg["chunks"])
211
+ if not os.path.exists(cfg["embeddings"]):
212
+ raise FileNotFoundError(cfg["embeddings"])
213
  index = faiss.read_index(cfg["index"])
214
  with open(cfg["chunks"], "r", encoding="utf-8") as f:
215
+ texts = [it.get("text", "") for it in json.load(f)]
216
+ embs = np.load(cfg["embeddings"]) # (N, dim)
217
  if index.ntotal != len(embs):
218
  raise RuntimeError(f"Mismatch ntotal({index.ntotal}) vs emb({len(embs)})")
 
219
  return SubjectAssets(index=index, texts=texts, embs=embs)
220
 
221
+ # ========= ENCODER =========
222
+
223
  @torch.inference_mode()
224
+ @lru_cache(maxsize=1024)
225
  def encode_query_exact(text: str) -> np.ndarray:
226
  toks = ENCODER_TOKENIZER(text, padding=True, truncation=True, return_tensors="pt").to(ENCODER_DEVICE)
227
  out = ENCODER_MODEL(**toks)
 
230
 
231
  def cosine_sim(a: np.ndarray, b: np.ndarray) -> float:
232
  a = np.asarray(a).reshape(-1); b = np.asarray(b).reshape(-1)
233
+ denom = (np.linalg.norm(a) * np.linalg.norm(b)) + 1e-12
234
+ return float(np.dot(a, b) / denom)
235
+
236
+ # ========= RETRIEVAL CEPAT =========
237
 
238
  def best_cosine_from_faiss(query: str, subject_key: str) -> float:
239
  assets = load_subject_assets(subject_key)
 
246
  best = max(best, cosine_sim(qv, assets.embs[i]))
247
  return best
248
 
249
+ def retrieve_top_chunks(query: str, subject_key: str) -> List[str]:
250
  assets = load_subject_assets(subject_key)
251
  q = encode_query_exact(query)
252
+ _, idx = assets.index.search(q, TOP_K_FAISS)
253
  idxs = [i for i in idx[0] if 0 <= i < len(assets.texts)]
254
+ return [assets.texts[i] for i in idxs[:TOP_K_FINAL]]
255
+
256
+ def pick_best_sentences_fast(query: str, chunks: List[str], top_k: int = 4) -> List[str]:
257
+ # Tanpa encode per kalimat hanya lexical overlap + panjang wajar
 
 
 
 
 
 
 
 
258
  cands: List[Tuple[float, str]] = []
259
  for ch in chunks:
260
+ for s in split_sentences_fast(ch):
 
 
261
  ovl = lexical_overlap(query, s)
262
+ if ovl < MIN_LEXICAL:
263
+ continue
264
+ # bonus sedikit kalau kalimat panjang wajar (50–220 char)
265
+ L = len(s)
266
+ len_bonus = 0.05 if 50 <= L <= 220 else 0.0
267
+ score = ovl + len_bonus
268
+ cands.append((score, s))
269
  cands.sort(key=lambda x: x[0], reverse=True)
270
  return [s for _, s in cands[:top_k]]
271
 
272
+ # ========= PROMPT =========
273
+
274
  def build_prompt(user_query: str, sentences: List[str]) -> str:
275
  block = "\n".join(f"- {clean_prefix(s)}" for s in sentences)
276
  system = (
277
+ "Kamu asisten RAG.\n"
278
+ f"- Jika tidak ada kalimat yang relevan, tulis persis: {FALLBACK_TEXT}\n"
279
+ "- Jawab TEPAT 1 kalimat, ringkas, Bahasa Indonesia baku (≥ 6 kata).\n"
280
+ "- Tanpa frasa meta (berdasarkan/menurut/merujuk/mengacu/bersumber).\n"
281
+ "- Tulis jawaban final di dalam tag <final>Jawaban.</final> dan jangan menulis apa pun setelah </final>."
282
+ )
283
+ fewshot = (
284
+ "Contoh format: \n"
285
+ "KALIMAT SUMBER:\n- Air memuai saat dipanaskan.\n"
286
+ "PERTANYAAN: Apa yang terjadi pada air saat dipanaskan?\n"
287
+ "<final>Air akan memuai ketika dipanaskan.</final>\n"
288
  )
289
+ return (
290
+ f"{system}\n\n{fewshot}\n"
291
+ f"KALIMAT SUMBER:\n{block}\n\n"
292
+ f"PERTANYAAN: {user_query}\n"
293
+ f"TULIS JAWABAN DI DALAM <final>...</final> SAJA:"
294
+ )
295
+
296
+ @lru_cache(maxsize=1024)
 
297
  def validate_input_cached(q: str) -> bool:
298
  try:
299
  return validate_input(q)
 
304
  # ========= AUTH (POSTGRES) =========
305
  from werkzeug.security import generate_password_hash, check_password_hash
306
  from sqlalchemy import create_engine, Column, Integer, String, Text, Boolean, func, or_
307
+ from sqlalchemy.orm import sessionmaker, scoped_session, declarative_base, Session
308
 
309
  POSTGRES_URL = os.environ.get("POSTGRES_URL")
310
  if not POSTGRES_URL:
 
316
 
317
  class User(Base):
318
  __tablename__ = "users"
319
+ id = Column(Integer, primary_key=True)
320
+ username = Column(String(50), unique=True, nullable=False, index=True)
321
+ email = Column(String(120), unique=True, nullable=False, index=True)
322
+ password = Column(Text, nullable=False)
323
  is_active = Column(Boolean, default=True, nullable=False)
324
  is_admin = Column(Boolean, default=False, nullable=False)
325
 
326
  class ChatHistory(Base):
327
  __tablename__ = "chat_history"
328
+ id = Column(Integer, primary_key=True)
329
+ user_id = Column(Integer, nullable=False, index=True)
330
  subject_key = Column(String(50), nullable=False, index=True)
331
+ role = Column(String(10), nullable=False)
332
+ message = Column(Text, nullable=False)
333
+ timestamp = Column(Integer, server_default=func.extract("epoch", func.now()))
334
 
335
  Base.metadata.create_all(bind=engine)
336
 
337
  JKT_TZ = ZoneInfo("Asia/Jakarta")
338
+
339
  @app.template_filter("fmt_ts")
340
  def fmt_ts(epoch_int: int):
341
  try:
 
348
  return SessionLocal()
349
 
350
  def login_required(view_func):
351
+ @wraps(view_func)
352
  def wrapper(*args, **kwargs):
353
  if not session.get("logged_in"):
354
  return redirect(url_for("auth_login"))
355
  return view_func(*args, **kwargs)
 
356
  return wrapper
357
 
358
  def admin_required(view_func):
359
+ @wraps(view_func)
360
  def wrapper(*args, **kwargs):
361
  if not session.get("logged_in"):
362
  return redirect(url_for("auth_login"))
 
364
  flash("Hanya admin yang boleh mengakses halaman itu.", "error")
365
  return redirect(url_for("subjects"))
366
  return view_func(*args, **kwargs)
 
367
  return wrapper
368
 
 
369
  # ========= ROUTES =========
370
  @app.route("/")
371
  def root():
 
375
  def auth_login():
376
  if request.method == "POST":
377
  identity = (
378
+ request.form.get("identity") or request.form.get("email") or request.form.get("username") or ""
 
 
 
379
  ).strip().lower()
380
  pw_input = (request.form.get("password") or "").strip()
 
381
  if not identity or not pw_input:
382
  flash("Mohon isi email/username dan password.", "error")
383
  return render_template("login.html"), 400
 
384
  s = db()
385
  try:
386
  user = (
387
  s.query(User)
388
+ .filter(or_(func.lower(User.username) == identity, func.lower(User.email) == identity))
389
+ .first()
 
390
  )
391
  log.info(f"[LOGIN] identity='{identity}' found={bool(user)} active={getattr(user,'is_active',None)}")
392
  ok = bool(user and user.is_active and check_password_hash(user.password, pw_input))
393
  finally:
394
  s.close()
 
395
  if not ok:
396
  flash("Identitas atau password salah.", "error")
397
  return render_template("login.html"), 401
 
398
  session["logged_in"] = True
399
+ session["user_id"] = user.id
400
+ session["username"] = user.username
401
+ session["is_admin"] = bool(user.is_admin)
402
  log.info(f"[LOGIN] OK user_id={user.id}; session set.")
403
  return redirect(url_for("subjects"))
404
  return render_template("login.html")
 
412
  "is_admin": session.get("is_admin"),
413
  }
414
 
 
415
  @app.route("/auth/register", methods=["GET", "POST"])
416
  def auth_register():
417
  if request.method == "POST":
 
432
  try:
433
  existed = (
434
  s.query(User)
435
+ .filter(or_(func.lower(User.username) == username, func.lower(User.email) == email))
436
+ .first()
 
437
  )
438
  if existed:
439
  flash("Username/Email sudah terpakai.", "error")
 
455
  def about():
456
  return render_template("about.html")
457
 
 
458
  @app.route("/subjects")
459
  @login_required
460
  def subjects():
461
  log.info(f"[SESSION DEBUG] logged_in={session.get('logged_in')} user_id={session.get('user_id')}")
462
  return render_template("home.html", subjects=SUBJECTS)
463
 
 
464
  @app.route("/chat/<subject_key>")
465
  @login_required
466
  def chat_subject(subject_key: str):
 
468
  return redirect(url_for("subjects"))
469
  session["subject_selected"] = subject_key
470
  label = SUBJECTS[subject_key]["label"]
 
471
  s = db()
472
  try:
473
  uid = session.get("user_id")
474
  rows = (
475
  s.query(ChatHistory)
476
+ .filter_by(user_id=uid, subject_key=subject_key)
477
+ .order_by(ChatHistory.id.asc())
478
+ .all()
479
  )
480
  history = [{"role": r.role, "message": r.message} for r in rows]
481
  finally:
482
  s.close()
 
483
  return render_template("chat.html", subject=subject_key, subject_label=label, history=history)
484
 
485
  @app.route("/health")
486
  def health():
487
+ return jsonify({
488
+ "ok": True,
489
+ "encoder_loaded": ENCODER_MODEL is not None,
490
+ "llm_loaded": LLM is not None,
491
+ "model_path": MODEL_PATH,
492
+ "ctx_window": CTX_WINDOW,
493
+ "threads": N_THREADS,
494
+ })
495
 
496
  @app.route("/ask/<subject_key>", methods=["POST"])
497
  @login_required
498
  def ask(subject_key: str):
499
  if subject_key not in SUBJECTS:
500
  return jsonify({"ok": False, "error": "invalid subject"}), 400
 
 
501
  warmup_models()
 
502
  t0 = time.perf_counter()
 
 
503
 
504
+ data = request.get_json(silent=True) or {}
505
+ query = (data.get("message") or "").strip()
506
  if not query:
507
  return jsonify({"ok": False, "error": "empty query"}), 400
508
  if not validate_input_cached(query):
 
519
  if best < MIN_COSINE:
520
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
521
 
522
+ chunks = retrieve_top_chunks(query, subject_key)
523
  if not chunks:
524
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
525
+
526
+ sentences = pick_best_sentences_fast(query, chunks, top_k=4)
527
  if not sentences:
528
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
529
 
530
  prompt = build_prompt(query, sentences)
531
 
532
  try:
533
+ # PASS-1: deterministik & singkat
534
+ raw_answer = generate(
535
+ LLM,
536
+ prompt,
537
+ max_tokens=int(os.environ.get("MAX_TOKENS", 72)),
538
+ temperature=float(os.environ.get("TEMP", 0.0)),
539
+ top_p=1.0,
540
+ stop=["</final>"]
541
+ ) or ""
542
+ raw_answer = raw_answer.strip()
543
+ log.info(f"[LLM] Raw answer repr (pass1): {repr(raw_answer)}")
544
+
545
+ text = re.sub(r"<think\b[^>]*>.*?</think>", "", raw_answer, flags=re.DOTALL | re.IGNORECASE).strip()
546
+ text = re.sub(r"</?think\b[^>]*>", "", text, flags=re.IGNORECASE).strip()
547
+ m_final = re.search(r"<final>\s*(.+)$", text, flags=re.IGNORECASE | re.DOTALL)
548
+ cleaned = (m_final.group(1).strip() if m_final else re.sub(r"<[^>]+>", "", text).strip())
549
+
550
+ def _is_bad(s: str) -> bool:
551
+ s2 = s.strip()
552
+ return (len(re.sub(r"[^A-Za-zÀ-ÖØ-öø-ÿ]+", "", s2)) < 3) or (s2 in {"...", ".", "..", "…"}) or (len(s2.split()) < 6)
553
+
554
+ if _is_bad(cleaned):
555
+ prompt_retry = (
556
+ prompt
557
+ + "\n\nULANGI DENGAN TAAT FORMAT: Tulis satu kalimat faktual tanpa placeholder/ellipsis, minimal 6 kata, mulai huruf kapital dan akhiri titik. Tulis hanya di dalam <final>...</final>."
558
+ )
559
+ raw_answer2 = generate(
560
+ LLM,
561
+ prompt_retry,
562
+ max_tokens=int(os.environ.get("MAX_TOKENS", 72)),
563
+ temperature=0.2,
564
+ top_p=1.0,
565
+ stop=["</final>"]
566
+ ) or ""
567
+ raw_answer2 = raw_answer2.strip()
568
+ log.info(f"[LLM] Raw answer repr (pass2): {repr(raw_answer2)}")
569
+ text2 = re.sub(r"<think\b[^>]*>.*?</think>", "", raw_answer2, flags=re.DOTALL | re.IGNORECASE).strip()
570
+ text2 = re.sub(r"</?think\b[^>]*>", "", text2, flags=re.IGNORECASE).strip()
571
+ m_final2 = re.search(r"<final>\s*(.+)$", text2, flags=re.IGNORECASE | re.DOTALL)
572
+ cleaned2 = (m_final2.group(1).strip() if m_final2 else re.sub(r"<[^>]+>", "", text2).strip())
573
+ cleaned = cleaned2 or cleaned
574
+
575
+ answer = cleaned
576
+
577
  except Exception as e:
578
  log.exception(f"[LLM] generate error: {e}")
579
  return jsonify({"ok": True, "answer": FALLBACK_TEXT})
580
 
581
+ # Ambil 1 kalimat pertama saja
582
  m = re.search(r"(.+?[.!?])(\s|$)", answer)
583
  answer = (m.group(1) if m else answer).strip()
584
  answer = strip_meta_sentence(answer)
585
 
586
+ # Simpan history
587
  try:
588
  s = db()
589
  uid = session.get("user_id")
590
  s.add_all([
591
  ChatHistory(user_id=uid, subject_key=subject_key, role="user", message=query),
592
+ ChatHistory(user_id=uid, subject_key=subject_key, role="bot", message=answer),
593
  ])
594
  s.commit()
595
  except Exception as e:
596
  log.exception(f"[DB] gagal simpan chat history: {e}")
597
  finally:
598
+ try:
599
+ s.close()
600
+ except Exception:
601
+ pass
602
 
603
  if not answer or len(answer) < 2:
604
  answer = FALLBACK_TEXT
605
 
606
  if ENABLE_PROFILING:
607
+ log.info({
608
+ "latency_total": time.perf_counter() - t0,
609
+ "subject": subject_key,
610
+ "faiss_best": best,
611
+ })
612
 
613
  return jsonify({"ok": True, "answer": answer})
614
 
615
+ # ===== Admin =====
 
616
  @app.route("/admin")
617
  @admin_required
618
  def admin_dashboard():
619
  s = db()
620
  try:
621
+ total_users = s.query(func.count(User.id)).scalar() or 0
622
+ total_active = s.query(func.count(User.id)).filter(User.is_active.is_(True)).scalar() or 0
623
+ total_admins = s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0
624
+ total_msgs = s.query(func.count(ChatHistory.id)).scalar() or 0
625
  finally:
626
  s.close()
627
+ return render_template("admin_dashboard.html", total_users=total_users, total_active=total_active, total_admins=total_admins, total_msgs=total_msgs)
 
 
 
 
628
 
629
  @app.route("/admin/users")
630
  @admin_required
 
636
  try:
637
  base = s.query(User)
638
  if q:
639
+ base = base.filter(or_(func.lower(User.username).like(f"%{q}%"), func.lower(User.email).like(f"%{q}%")))
 
 
 
640
  total = base.count()
641
+ users = base.order_by(User.id.asc()).offset((page - 1) * per_page).limit(per_page).all()
 
 
 
 
642
  user_ids = [u.id for u in users] or [-1]
643
+ counts = dict(s.query(ChatHistory.user_id, func.count(ChatHistory.id)).filter(ChatHistory.user_id.in_(user_ids)).group_by(ChatHistory.user_id).all())
 
 
 
644
  finally:
645
  s.close()
646
+ return render_template("admin_users.html", users=users, counts=counts, q=q, page=page, per_page=per_page, total=total)
 
 
647
 
648
  @app.route("/admin/history")
649
  @admin_required
650
  def admin_history():
651
+ q = (request.args.get("q") or "").strip().lower()
652
+ username = (request.args.get("username") or "").strip().lower()
653
+ subject = (request.args.get("subject") or "").strip().lower()
654
+ role = (request.args.get("role") or "").strip().lower()
655
+ page = max(int(request.args.get("page", 1)), 1)
656
+ per_page = min(max(int(request.args.get("per_page", 30)), 5), 200)
 
657
  s = db()
658
  try:
659
  base = (s.query(ChatHistory, User).join(User, User.id == ChatHistory.user_id))
660
  if q:
661
  base = base.filter(func.lower(ChatHistory.message).like(f"%{q}%"))
662
  if username:
663
+ base = base.filter(or_(func.lower(User.username) == username, func.lower(User.email) == username))
 
 
 
664
  if subject:
665
  base = base.filter(func.lower(ChatHistory.subject_key) == subject)
666
  if role in ("user", "bot"):
667
  base = base.filter(ChatHistory.role == role)
668
  total = base.count()
669
+ rows = base.order_by(ChatHistory.id.desc()).offset((page - 1) * per_page).limit(per_page).all()
 
 
 
670
  finally:
671
  s.close()
 
672
  items = [{
673
  "id": r.ChatHistory.id,
674
  "username": r.User.username,
 
678
  "message": r.ChatHistory.message,
679
  "timestamp": r.ChatHistory.timestamp,
680
  } for r in rows]
681
+ return render_template("admin_history.html", items=items, subjects=SUBJECTS, q=q, username=username, subject=subject, role=role, page=page, per_page=per_page, total=total)
 
 
 
 
682
 
683
  def _is_last_admin(s: Session) -> bool:
684
  return (s.query(func.count(User.id)).filter(User.is_admin.is_(True)).scalar() or 0) <= 1
 
749
  # ========= ENTRY =========
750
  if __name__ == "__main__":
751
  port = int(os.environ.get("PORT", 7860))
752
+ app.run(host="0.0.0.0", port=port, debug=False)