Spaces:

korayaggul
/

QA-Quality-Evaluator

Sleeping

App Files Files Community

korayaggul commited on Sep 24

Commit

0217536

verified ·

1 Parent(s): f509ff5

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -101

app.py CHANGED Viewed

@@ -1,143 +1,187 @@
 import json
 import tempfile
-from typing import List, Dict, Any
 import gradio as gr
-from transformers import pipeline
-# --- Lazy init: Space başlarken modeli bir kez yükleyelim
-quality_clf = pipeline("text-classification", model="snorkelai/instruction-response-quality")
-def score_item(item: Dict[str, Any]) -> Dict[str, Any]:
-    """Tek bir QA kaydını skorla ve quality alanını ekle."""
-    q = item.get("question", "")
-    a = item.get("answer", "")
-    text = f"Q: {q}\nA: {a}"
-    pred = quality_clf(text, truncation=True)[0]
-    score = float(pred["score"])
-    if score > 0.75:
-        label = "high"
-    elif score > 0.40:
-        label = "medium"
-    else:
-        label = "low"
-    item["quality"] = {"label": label, "score": round(score, 3)}
-    return item
-def improve_item(item: Dict[str, Any], target: str = "medium") -> Dict[str, Any]:
     """
-    Düşük skorlara basit 'kural tabanlı' iyileştirme (LLM yok; hafif ve ücretsiz).
-    İstersen buraya bir instruct LLM entegre edebiliriz.
     """
-    label = item.get("quality", {}).get("label")
-    if label in ("high",) or target == "none":
-        return item
-    q = item.get("question", "")
-    a = item.get("answer", "")
-    # Basit temizlikler: boşluk, büyük harf, noktalama
-    q2 = q.strip()
-    if q2 and q2[-1] not in "?":
-        q2 += "?"
-    if q2 and q2[0].islower():
-        q2 = q2[0].upper() + q2[1:]
-    a2 = a.strip()
-    if a2 and a2[0].islower():
-        a2 = a2[0].upper() + a2[1:]
-    if a2 and a2[-1] not in ".!?":
-        a2 += "."
-    # Çok kısa cevapları minimal genişletme
-    if len(a2.split()) < 5:
-        a2 = a2 + " This answer has been clarified for brevity and precision."
-    item["question"] = q2
-    item["answer"] = a2
     return item
 def process_json(
-    file,
-    auto_improve: bool,
-    improve_threshold: str
-):
-    # JSON içeriğini yükle (liste veya tek obje destekler)
     data = json.load(open(file.name))
     items: List[Dict[str, Any]] = data if isinstance(data, list) else [data]
-    # Skorla
-    scored: List[Dict[str, Any]] = [score_item(dict(it)) for it in items]
-    # İyileştirme isteğe bağlı
-    if auto_improve:
         def needs_improve(lbl: str) -> bool:
-            if improve_threshold == "low_only":
                 return lbl == "low"
-            elif improve_threshold == "low_and_medium":
                 return lbl in ("low", "medium")
             return False
-        improved = []
-        for it in scored:
-            lbl = it.get("quality", {}).get("label", "low")
-            if needs_improve(lbl):
-                it = improve_item(it)
-                # yeniden skorlayalım ki farkı görelim
-                it = score_item(it)
-            improved.append(it)
-        scored = improved
-    # Özet tablo için küçük bir görünüm (id yoksa index)
     summary = []
-    for idx, it in enumerate(scored):
         summary.append({
-            "id": it.get("id", idx),
-            "quality_label": it["quality"]["label"],
-            "quality_score": it["quality"]["score"],
             "question_preview": (it.get("question") or "")[:120]
         })
-    # İndirilebilir JSON oluştur
-    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w")
     json.dump(scored, tmp, indent=2, ensure_ascii=False)
-    tmp.flush()
-    tmp.close()
-    # Büyük JSON’u rahat okumak için Code kutusu
-    pretty = json.dumps(scored[:50], indent=2, ensure_ascii=False)  # önizlemede ilk 50 satır
     if len(scored) > 50:
-        pretty += f"\n\n// NOTE: Showing first 50 items. Download full file below."
-    return summary, pretty, tmp.name
 with gr.Blocks(title="Q&A Quality Evaluator", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## Q&A Quality Evaluator\nUpload your JSON, score quality, and (optionally) auto-improve low items.")
     with gr.Row():
-        inp_file = gr.File(file_types=[".json"], label="Upload JSON (list of objects)")
     with gr.Row():
-        auto_switch = gr.Checkbox(label="Auto-improve low-quality items (light rules, no LLM)", value=False)
-        improve_sel = gr.Radio(choices=["low_only", "low_and_medium", "none"], value="low_only",
-                               label="Improve threshold")
-    run_btn = gr.Button("Score (and Improve)")
-    gr.Markdown("### Results")
-    with gr.Tab("Summary Table"):
-        out_table = gr.Dataframe(headers=["id", "quality_label", "quality_score", "question_preview"], wrap=True, height=400)
     with gr.Tab("Preview JSON"):
-        # Büyük bir pencere: lines=30 ile rahat görüntü
-        out_code = gr.Code(language="json", label="Preview (first 50 items)", interactive=False, lines=30)
     with gr.Tab("Download"):
-        out_file = gr.File(label="Download full scored JSON")
-    run_btn.click(
-        process_json,
-        inputs=[inp_file, auto_switch, improve_sel],
-        outputs=[out_table, out_code, out_file]
-    )
 if __name__ == "__main__":
     demo.launch()

 import json
 import tempfile
+from typing import List, Dict, Any, Tuple
 import gradio as gr
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+# ---------------------------
+# Model init (HF Transformers ile uyumlu)
+# ---------------------------
+MODEL_ID = "OpenAssistant/reward-model-deberta-v3-large-v2"
+try:
+    quality_pipe = pipeline(
+        task="text-classification",
+        model=MODEL_ID,
+        tokenizer=MODEL_ID,
+        return_all_scores=False,
+        function_to_apply="none"  # regression; pipelinede score'ı direkt ver
+    )
+    MODEL_READY = True
+except Exception as e:
+    MODEL_READY = False
+    LOAD_ERR = str(e)
+def score_pair(question: str, answer: str) -> float:
+    """
+    Reward model prompt-response formatını besliyoruz.
+    Bu model regresyon skoru döndürür (daha büyük = daha iyi).
+    """
+    if not MODEL_READY:
+        # Model yüklenemezse yumuşak yedek: basit heuristik skor
+        # (uzunluk, noktalama, soru işareti vs.)
+        base = 0.3
+        if question.strip().endswith("?"):
+            base += 0.1
+        if len(answer.split()) >= 6:
+            base += 0.2
+        if answer.strip().endswith((".", "!", "?")):
+            base += 0.1
+        return base
+    text = f"Human: {question}\nAssistant: {answer}"
+    out = quality_pipe(text, truncation=True)[0]
+    # out = {'label': 'LABEL_0', 'score': <float>}  -> regression score
+    return float(out["score"])
+def label_from_score(score: float) -> str:
+    """
+    Eşikler: reward modellerde aralık veri setine göre değişebilir.
+    Pratikte şu ayrımlar iş görüyor:
+      >0.6 -> high, 0.3-0.6 -> medium, else -> low
+    """
+    if score > 0.6:
+        return "high"
+    if score > 0.3:
+        return "medium"
+    return "low"
+def improve_light(item: Dict[str, Any]) -> Dict[str, Any]:
     """
+    LLM kullanmadan hafif iyileştirme (ücretsiz/CPU):
+    - soru sonuna '?' ekle, ilk harfi büyüt
+    - cevap ilk harfi büyüt, sonuna noktalama koy
+    - çok kısa cevapsa 1 cümleyle netleştir
     """
+    q = (item.get("question") or "").strip()
+    a = (item.get("answer") or "").strip()
+    if q:
+        if not q.endswith("?"):
+            q += "?"
+        if q[0].islower():
+            q = q[0].upper() + q[1:]
+    if a:
+        if a[0].islower():
+            a = a[0].upper() + a[1:]
+        if a[-1] not in ".!?":
+            a += "."
+    if len(a.split()) < 6:
+        a = a + " This answer has been clarified for brevity and precision."
+    item["question"] = q
+    item["answer"] = a
     return item
 def process_json(
+    file, auto_improve: bool, improve_policy: str
+) -> Tuple[List[Dict[str, Any]], str, str, str]:
+    """
+    Girdi: JSON (list veya tek obje)
+    Çıktı:
+      - özet tablo (DataFrame)
+      - önizleme JSON (first 50 items)
+      - indirilebilir tam JSON path
+      - model yükleme uyarısı
+    """
     data = json.load(open(file.name))
     items: List[Dict[str, Any]] = data if isinstance(data, list) else [data]
+    scored = []
+    for idx, raw in enumerate(items):
+        item = dict(raw)
+        q = item.get("question", "")
+        a = item.get("answer", "")
+        # 1) ilk skor
+        s1 = score_pair(q, a)
+        lbl1 = label_from_score(s1)
+        item["quality"] = {"label": lbl1, "score": round(s1, 3)}
+        # 2) gerekiyorsa iyileştir
         def needs_improve(lbl: str) -> bool:
+            if improve_policy == "none":
+                return False
+            if improve_policy == "low_only":
                 return lbl == "low"
+            if improve_policy == "low_and_medium":
                 return lbl in ("low", "medium")
             return False
+        if auto_improve and needs_improve(lbl1):
+            item = improve_light(item)
+            # 3) tekrar skor
+            s2 = score_pair(item.get("question", ""), item.get("answer", ""))
+            lbl2 = label_from_score(s2)
+            item["quality_after"] = {"label": lbl2, "score": round(s2, 3)}
+        scored.append(item)
+    # Özet
     summary = []
+    for i, it in enumerate(scored):
+        qa = it.get("quality", {})
+        qa2 = it.get("quality_after")
         summary.append({
+            "id": it.get("id", i),
+            "label": qa.get("label"),
+            "score": qa.get("score"),
+            "label_after": qa2.get("label") if qa2 else None,
+            "score_after": qa2.get("score") if qa2 else None,
             "question_preview": (it.get("question") or "")[:120]
         })
+    # İndirilebilir dosya
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w", encoding="utf-8")
     json.dump(scored, tmp, indent=2, ensure_ascii=False)
+    tmp.flush(); tmp.close()
+    # Önizleme
+    preview = json.dumps(scored[:50], indent=2, ensure_ascii=False)
     if len(scored) > 50:
+        preview += "\n\n// NOTE: Showing first 50 items. Download full file below."
+    warn = ""
+    if not MODEL_READY:
+        warn = f"Warning: model '{MODEL_ID}' could not be loaded, used heuristic scoring. Error: {LOAD_ERR}"
+    return summary, preview, tmp.name, warn
 with gr.Blocks(title="Q&A Quality Evaluator", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## Q&A Quality Evaluator\nUpload Q&A JSON, get model-based quality scores, and optionally auto-improve low items.")
     with gr.Row():
+        inp = gr.File(file_types=[".json"], label="Upload JSON (list of objects)")
     with gr.Row():
+        auto = gr.Checkbox(value=False, label="Auto-improve low items (rule-based, no LLM)")
+        policy = gr.Radio(choices=["low_only", "low_and_medium", "none"], value="low_only", label="Improve threshold")
+    run = gr.Button("Run")
+    with gr.Tab("Summary"):
+        tbl = gr.Dataframe(headers=["id","label","score","label_after","score_after","question_preview"],
+                           wrap=True, height=460)
     with gr.Tab("Preview JSON"):
+        code = gr.Code(language="json", lines=34, label="Preview (first 50 items)")
     with gr.Tab("Download"):
+        dfile = gr.File(label="Download full JSON")
+    warnbox = gr.Markdown("")
+    run.click(process_json, inputs=[inp, auto, policy], outputs=[tbl, code, dfile, warnbox])
 if __name__ == "__main__":
     demo.launch()