korayaggul commited on
Commit
0217536
·
verified ·
1 Parent(s): f509ff5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -101
app.py CHANGED
@@ -1,143 +1,187 @@
1
  import json
2
  import tempfile
3
- from typing import List, Dict, Any
4
 
5
  import gradio as gr
6
- from transformers import pipeline
7
-
8
- # --- Lazy init: Space başlarken modeli bir kez yükleyelim
9
- quality_clf = pipeline("text-classification", model="snorkelai/instruction-response-quality")
10
-
11
- def score_item(item: Dict[str, Any]) -> Dict[str, Any]:
12
- """Tek bir QA kaydını skorla ve quality alanını ekle."""
13
- q = item.get("question", "")
14
- a = item.get("answer", "")
15
- text = f"Q: {q}\nA: {a}"
16
- pred = quality_clf(text, truncation=True)[0]
17
- score = float(pred["score"])
18
- if score > 0.75:
19
- label = "high"
20
- elif score > 0.40:
21
- label = "medium"
22
- else:
23
- label = "low"
24
- item["quality"] = {"label": label, "score": round(score, 3)}
25
- return item
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- def improve_item(item: Dict[str, Any], target: str = "medium") -> Dict[str, Any]:
28
  """
29
- Düşük skorlara basit 'kural tabanlı' iyileştirme (LLM yok; hafif ve ücretsiz).
30
- İstersen buraya bir instruct LLM entegre edebiliriz.
 
 
31
  """
32
- label = item.get("quality", {}).get("label")
33
- if label in ("high",) or target == "none":
34
- return item
35
-
36
- q = item.get("question", "")
37
- a = item.get("answer", "")
38
-
39
- # Basit temizlikler: boşluk, büyük harf, noktalama
40
- q2 = q.strip()
41
- if q2 and q2[-1] not in "?":
42
- q2 += "?"
43
- if q2 and q2[0].islower():
44
- q2 = q2[0].upper() + q2[1:]
45
-
46
- a2 = a.strip()
47
- if a2 and a2[0].islower():
48
- a2 = a2[0].upper() + a2[1:]
49
- if a2 and a2[-1] not in ".!?":
50
- a2 += "."
51
-
52
- # Çok kısa cevapları minimal genişletme
53
- if len(a2.split()) < 5:
54
- a2 = a2 + " This answer has been clarified for brevity and precision."
55
-
56
- item["question"] = q2
57
- item["answer"] = a2
58
  return item
59
 
60
  def process_json(
61
- file,
62
- auto_improve: bool,
63
- improve_threshold: str
64
- ):
65
- # JSON içeriğini yükle (liste veya tek obje destekler)
 
 
 
 
 
66
  data = json.load(open(file.name))
67
  items: List[Dict[str, Any]] = data if isinstance(data, list) else [data]
68
 
69
- # Skorla
70
- scored: List[Dict[str, Any]] = [score_item(dict(it)) for it in items]
 
 
 
71
 
72
- # İyileştirme isteğe bağlı
73
- if auto_improve:
 
 
 
 
74
  def needs_improve(lbl: str) -> bool:
75
- if improve_threshold == "low_only":
 
 
76
  return lbl == "low"
77
- elif improve_threshold == "low_and_medium":
78
  return lbl in ("low", "medium")
79
  return False
80
 
81
- improved = []
82
- for it in scored:
83
- lbl = it.get("quality", {}).get("label", "low")
84
- if needs_improve(lbl):
85
- it = improve_item(it)
86
- # yeniden skorlayalım ki farkı görelim
87
- it = score_item(it)
88
- improved.append(it)
89
- scored = improved
90
-
91
- # Özet tablo için küçük bir görünüm (id yoksa index)
92
  summary = []
93
- for idx, it in enumerate(scored):
 
 
94
  summary.append({
95
- "id": it.get("id", idx),
96
- "quality_label": it["quality"]["label"],
97
- "quality_score": it["quality"]["score"],
 
 
98
  "question_preview": (it.get("question") or "")[:120]
99
  })
100
 
101
- # İndirilebilir JSON oluştur
102
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w")
103
  json.dump(scored, tmp, indent=2, ensure_ascii=False)
104
- tmp.flush()
105
- tmp.close()
106
 
107
- # Büyük JSON’u rahat okumak için Code kutusu
108
- pretty = json.dumps(scored[:50], indent=2, ensure_ascii=False) # önizlemede ilk 50 satır
109
  if len(scored) > 50:
110
- pretty += f"\n\n// NOTE: Showing first 50 items. Download full file below."
 
 
 
 
111
 
112
- return summary, pretty, tmp.name
113
 
114
  with gr.Blocks(title="Q&A Quality Evaluator", theme=gr.themes.Soft()) as demo:
115
- gr.Markdown("## Q&A Quality Evaluator\nUpload your JSON, score quality, and (optionally) auto-improve low items.")
116
 
117
  with gr.Row():
118
- inp_file = gr.File(file_types=[".json"], label="Upload JSON (list of objects)")
119
 
120
  with gr.Row():
121
- auto_switch = gr.Checkbox(label="Auto-improve low-quality items (light rules, no LLM)", value=False)
122
- improve_sel = gr.Radio(choices=["low_only", "low_and_medium", "none"], value="low_only",
123
- label="Improve threshold")
124
 
125
- run_btn = gr.Button("Score (and Improve)")
126
 
127
- gr.Markdown("### Results")
128
- with gr.Tab("Summary Table"):
129
- out_table = gr.Dataframe(headers=["id", "quality_label", "quality_score", "question_preview"], wrap=True, height=400)
130
  with gr.Tab("Preview JSON"):
131
- # Büyük bir pencere: lines=30 ile rahat görüntü
132
- out_code = gr.Code(language="json", label="Preview (first 50 items)", interactive=False, lines=30)
133
  with gr.Tab("Download"):
134
- out_file = gr.File(label="Download full scored JSON")
 
135
 
136
- run_btn.click(
137
- process_json,
138
- inputs=[inp_file, auto_switch, improve_sel],
139
- outputs=[out_table, out_code, out_file]
140
- )
141
 
142
  if __name__ == "__main__":
143
  demo.launch()
 
1
  import json
2
  import tempfile
3
+ from typing import List, Dict, Any, Tuple
4
 
5
  import gradio as gr
6
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
7
+
8
+ # ---------------------------
9
+ # Model init (HF Transformers ile uyumlu)
10
+ # ---------------------------
11
+ MODEL_ID = "OpenAssistant/reward-model-deberta-v3-large-v2"
12
+
13
+ try:
14
+ quality_pipe = pipeline(
15
+ task="text-classification",
16
+ model=MODEL_ID,
17
+ tokenizer=MODEL_ID,
18
+ return_all_scores=False,
19
+ function_to_apply="none" # regression; pipelinede score'ı direkt ver
20
+ )
21
+ MODEL_READY = True
22
+ except Exception as e:
23
+ MODEL_READY = False
24
+ LOAD_ERR = str(e)
25
+
26
+ def score_pair(question: str, answer: str) -> float:
27
+ """
28
+ Reward model prompt-response formatını besliyoruz.
29
+ Bu model regresyon skoru döndürür (daha büyük = daha iyi).
30
+ """
31
+ if not MODEL_READY:
32
+ # Model yüklenemezse yumuşak yedek: basit heuristik skor
33
+ # (uzunluk, noktalama, soru işareti vs.)
34
+ base = 0.3
35
+ if question.strip().endswith("?"):
36
+ base += 0.1
37
+ if len(answer.split()) >= 6:
38
+ base += 0.2
39
+ if answer.strip().endswith((".", "!", "?")):
40
+ base += 0.1
41
+ return base
42
+
43
+ text = f"Human: {question}\nAssistant: {answer}"
44
+ out = quality_pipe(text, truncation=True)[0]
45
+ # out = {'label': 'LABEL_0', 'score': <float>} -> regression score
46
+ return float(out["score"])
47
+
48
+ def label_from_score(score: float) -> str:
49
+ """
50
+ Eşikler: reward modellerde aralık veri setine göre değişebilir.
51
+ Pratikte şu ayrımlar iş görüyor:
52
+ >0.6 -> high, 0.3-0.6 -> medium, else -> low
53
+ """
54
+ if score > 0.6:
55
+ return "high"
56
+ if score > 0.3:
57
+ return "medium"
58
+ return "low"
59
 
60
+ def improve_light(item: Dict[str, Any]) -> Dict[str, Any]:
61
  """
62
+ LLM kullanmadan hafif iyileştirme (ücretsiz/CPU):
63
+ - soru sonuna '?' ekle, ilk harfi büyüt
64
+ - cevap ilk harfi büyüt, sonuna noktalama koy
65
+ - çok kısa cevapsa 1 cümleyle netleştir
66
  """
67
+ q = (item.get("question") or "").strip()
68
+ a = (item.get("answer") or "").strip()
69
+
70
+ if q:
71
+ if not q.endswith("?"):
72
+ q += "?"
73
+ if q[0].islower():
74
+ q = q[0].upper() + q[1:]
75
+
76
+ if a:
77
+ if a[0].islower():
78
+ a = a[0].upper() + a[1:]
79
+ if a[-1] not in ".!?":
80
+ a += "."
81
+
82
+ if len(a.split()) < 6:
83
+ a = a + " This answer has been clarified for brevity and precision."
84
+
85
+ item["question"] = q
86
+ item["answer"] = a
 
 
 
 
 
 
87
  return item
88
 
89
  def process_json(
90
+ file, auto_improve: bool, improve_policy: str
91
+ ) -> Tuple[List[Dict[str, Any]], str, str, str]:
92
+ """
93
+ Girdi: JSON (list veya tek obje)
94
+ Çıktı:
95
+ - özet tablo (DataFrame)
96
+ - önizleme JSON (first 50 items)
97
+ - indirilebilir tam JSON path
98
+ - model yükleme uyarısı
99
+ """
100
  data = json.load(open(file.name))
101
  items: List[Dict[str, Any]] = data if isinstance(data, list) else [data]
102
 
103
+ scored = []
104
+ for idx, raw in enumerate(items):
105
+ item = dict(raw)
106
+ q = item.get("question", "")
107
+ a = item.get("answer", "")
108
 
109
+ # 1) ilk skor
110
+ s1 = score_pair(q, a)
111
+ lbl1 = label_from_score(s1)
112
+ item["quality"] = {"label": lbl1, "score": round(s1, 3)}
113
+
114
+ # 2) gerekiyorsa iyileştir
115
  def needs_improve(lbl: str) -> bool:
116
+ if improve_policy == "none":
117
+ return False
118
+ if improve_policy == "low_only":
119
  return lbl == "low"
120
+ if improve_policy == "low_and_medium":
121
  return lbl in ("low", "medium")
122
  return False
123
 
124
+ if auto_improve and needs_improve(lbl1):
125
+ item = improve_light(item)
126
+ # 3) tekrar skor
127
+ s2 = score_pair(item.get("question", ""), item.get("answer", ""))
128
+ lbl2 = label_from_score(s2)
129
+ item["quality_after"] = {"label": lbl2, "score": round(s2, 3)}
130
+
131
+ scored.append(item)
132
+
133
+ # Özet
 
134
  summary = []
135
+ for i, it in enumerate(scored):
136
+ qa = it.get("quality", {})
137
+ qa2 = it.get("quality_after")
138
  summary.append({
139
+ "id": it.get("id", i),
140
+ "label": qa.get("label"),
141
+ "score": qa.get("score"),
142
+ "label_after": qa2.get("label") if qa2 else None,
143
+ "score_after": qa2.get("score") if qa2 else None,
144
  "question_preview": (it.get("question") or "")[:120]
145
  })
146
 
147
+ # İndirilebilir dosya
148
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w", encoding="utf-8")
149
  json.dump(scored, tmp, indent=2, ensure_ascii=False)
150
+ tmp.flush(); tmp.close()
 
151
 
152
+ # Önizleme
153
+ preview = json.dumps(scored[:50], indent=2, ensure_ascii=False)
154
  if len(scored) > 50:
155
+ preview += "\n\n// NOTE: Showing first 50 items. Download full file below."
156
+
157
+ warn = ""
158
+ if not MODEL_READY:
159
+ warn = f"Warning: model '{MODEL_ID}' could not be loaded, used heuristic scoring. Error: {LOAD_ERR}"
160
 
161
+ return summary, preview, tmp.name, warn
162
 
163
  with gr.Blocks(title="Q&A Quality Evaluator", theme=gr.themes.Soft()) as demo:
164
+ gr.Markdown("## Q&A Quality Evaluator\nUpload Q&A JSON, get model-based quality scores, and optionally auto-improve low items.")
165
 
166
  with gr.Row():
167
+ inp = gr.File(file_types=[".json"], label="Upload JSON (list of objects)")
168
 
169
  with gr.Row():
170
+ auto = gr.Checkbox(value=False, label="Auto-improve low items (rule-based, no LLM)")
171
+ policy = gr.Radio(choices=["low_only", "low_and_medium", "none"], value="low_only", label="Improve threshold")
 
172
 
173
+ run = gr.Button("Run")
174
 
175
+ with gr.Tab("Summary"):
176
+ tbl = gr.Dataframe(headers=["id","label","score","label_after","score_after","question_preview"],
177
+ wrap=True, height=460)
178
  with gr.Tab("Preview JSON"):
179
+ code = gr.Code(language="json", lines=34, label="Preview (first 50 items)")
 
180
  with gr.Tab("Download"):
181
+ dfile = gr.File(label="Download full JSON")
182
+ warnbox = gr.Markdown("")
183
 
184
+ run.click(process_json, inputs=[inp, auto, policy], outputs=[tbl, code, dfile, warnbox])
 
 
 
 
185
 
186
  if __name__ == "__main__":
187
  demo.launch()