Yacine Jernite commited on
Commit
06103c4
·
1 Parent(s): 338aa78

load dataset

Browse files
Files changed (5) hide show
  1. README.md +3 -0
  2. app.py +126 -1
  3. requirements.txt +1 -0
  4. ui/tab_policy.py +11 -0
  5. ui/tab_testing.py +6 -0
README.md CHANGED
@@ -11,6 +11,9 @@ license: apache-2.0
11
  short_description: A model to test different models assessing content policies
12
  hf_oauth: true
13
  hf_oauth_scopes:
 
 
 
14
  - inference-api
15
  ---
16
 
 
11
  short_description: A model to test different models assessing content policies
12
  hf_oauth: true
13
  hf_oauth_scopes:
14
+ - read-repos
15
+ - write-repos
16
+ - manage-repos
17
  - inference-api
18
  ---
19
 
app.py CHANGED
@@ -7,10 +7,14 @@ import gradio as gr
7
 
8
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
9
 
 
 
 
10
  from utils.helpers import get_hf_token
11
  from utils.model_interface import extract_model_id, run_test
12
  from ui.sidebar import build_sidebar
13
  from ui.tab_config import build_config_tab
 
14
  from ui.tab_policy import build_policy_tab
15
  from ui.tab_testing import (
16
  build_testing_tab,
@@ -24,7 +28,7 @@ from ui.tab_testing import (
24
  # Handlers
25
  # ============================================================================
26
 
27
- def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, oauth_token: gr.OAuthToken | None = None):
28
  """Handle test execution."""
29
 
30
  if not test_input or not test_input.strip():
@@ -60,6 +64,33 @@ def handle_run_test(test_input, current_policy, model_choice, reasoning_effort,
60
  model_info = format_model_info(model_choice, reasoning_effort)
61
  reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_choice, reasoning)
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  return (
64
  model_info,
65
  label_text,
@@ -92,6 +123,7 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
92
  testing_components = build_testing_tab()
93
  test_input = testing_components["test_input"]
94
  run_test_btn = testing_components["run_test_btn"]
 
95
  model_info_display = testing_components["model_info_display"]
96
  label_display = testing_components["label_display"]
97
  categories_display = testing_components["categories_display"]
@@ -111,6 +143,11 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
111
  system_prompt_textbox = config_components["system_prompt_textbox"]
112
  response_format_textbox = config_components["response_format_textbox"]
113
 
 
 
 
 
 
114
  # ============================================================================
115
  # Event Handlers
116
  # ============================================================================
@@ -128,6 +165,7 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
128
  top_p,
129
  system_prompt_textbox,
130
  response_format_textbox,
 
131
  ],
132
  outputs=[
133
  model_info_display,
@@ -150,6 +188,93 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
150
  inputs=[model_dropdown, reasoning_effort],
151
  outputs=model_info_display,
152
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
 
155
  if __name__ == "__main__":
 
7
 
8
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
9
 
10
+ from datetime import datetime
11
+
12
+ from utils.dataset import format_categories_and_reasoning, save_to_dataset
13
  from utils.helpers import get_hf_token
14
  from utils.model_interface import extract_model_id, run_test
15
  from ui.sidebar import build_sidebar
16
  from ui.tab_config import build_config_tab
17
+ from ui.tab_dataset import build_dataset_tab
18
  from ui.tab_policy import build_policy_tab
19
  from ui.tab_testing import (
20
  build_testing_tab,
 
28
  # Handlers
29
  # ============================================================================
30
 
31
+ def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, save_mode, oauth_token: gr.OAuthToken | None = None):
32
  """Handle test execution."""
33
 
34
  if not test_input or not test_input.strip():
 
64
  model_info = format_model_info(model_choice, reasoning_effort)
65
  reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_choice, reasoning)
66
 
67
+ # Save to dataset if enabled
68
+ if save_mode == "Save to Dataset" and hf_token is not None:
69
+ try:
70
+ categories_and_reasoning_text = format_categories_and_reasoning(parsed)
71
+ policy_violation = parsed.get("label", -1)
72
+
73
+ data = {
74
+ "input": test_input,
75
+ "policy_violation": policy_violation,
76
+ "categories_and_reasoning": categories_and_reasoning_text,
77
+ "policy": current_policy,
78
+ "model_selection": model_choice,
79
+ "raw_response": raw_response,
80
+ "reasoning_trace": reasoning or "",
81
+ "reasoning_effort": reasoning_effort or "",
82
+ "max_tokens": int(max_tokens),
83
+ "temperature": float(temperature),
84
+ "top_p": float(top_p),
85
+ "system_prompt": system_prompt_val or "",
86
+ "response_format": response_format_val or "",
87
+ "timestamp": datetime.now().isoformat(),
88
+ }
89
+ save_to_dataset(hf_token, data)
90
+ except Exception as e:
91
+ # Log error but don't break test execution
92
+ print(f"Failed to save to dataset: {e}")
93
+
94
  return (
95
  model_info,
96
  label_text,
 
123
  testing_components = build_testing_tab()
124
  test_input = testing_components["test_input"]
125
  run_test_btn = testing_components["run_test_btn"]
126
+ save_mode = testing_components["save_mode"]
127
  model_info_display = testing_components["model_info_display"]
128
  label_display = testing_components["label_display"]
129
  categories_display = testing_components["categories_display"]
 
143
  system_prompt_textbox = config_components["system_prompt_textbox"]
144
  response_format_textbox = config_components["response_format_textbox"]
145
 
146
+ dataset_components = build_dataset_tab()
147
+ example_dropdown = dataset_components["example_dropdown"]
148
+ cached_examples = dataset_components["cached_examples"]
149
+ dropdown_choices_state = dataset_components["dropdown_choices_state"]
150
+
151
  # ============================================================================
152
  # Event Handlers
153
  # ============================================================================
 
165
  top_p,
166
  system_prompt_textbox,
167
  response_format_textbox,
168
+ save_mode,
169
  ],
170
  outputs=[
171
  model_info_display,
 
188
  inputs=[model_dropdown, reasoning_effort],
189
  outputs=model_info_display,
190
  )
191
+
192
+ # Dataset load handler
193
+ def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list):
194
+ """Load example from dataset and populate all fields."""
195
+ if (not cached_examples_list or not selected_label or
196
+ not dropdown_choices_list or selected_label not in dropdown_choices_list):
197
+ # Return None to skip updates
198
+ return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
199
+
200
+ try:
201
+ # Find index by matching label
202
+ idx = dropdown_choices_list.index(selected_label)
203
+ if idx < 0 or idx >= len(cached_examples_list):
204
+ return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
205
+
206
+ example = cached_examples_list[idx]
207
+
208
+ # Get policy - ensure it's a string (not None)
209
+ policy = example.get("policy", "") or ""
210
+
211
+ # Extract saved results
212
+ policy_violation = example.get("policy_violation", -1)
213
+ categories_and_reasoning = example.get("categories_and_reasoning", "")
214
+ raw_response = example.get("raw_response", "")
215
+ reasoning_trace = example.get("reasoning_trace", "")
216
+ model_selection = example.get("model_selection", "")
217
+ reasoning_effort_val = example.get("reasoning_effort", "")
218
+
219
+ # Format label text
220
+ if policy_violation == 1:
221
+ label_text = "## ❌ Policy Violation Detected"
222
+ elif policy_violation == 0:
223
+ label_text = "## ✅ No Policy Violation"
224
+ else:
225
+ label_text = "## ⚠️ Unable to determine label"
226
+
227
+ # Format model info
228
+ model_info = format_model_info(model_selection, reasoning_effort_val)
229
+
230
+ # Format reasoning info
231
+ reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_selection, reasoning_trace)
232
+
233
+ reasoning_visible = bool(reasoning_trace and reasoning_trace.strip())
234
+
235
+ return (
236
+ example.get("input", ""),
237
+ policy, # current_policy_state - UI syncs automatically via change handler
238
+ example.get("model_selection", ""),
239
+ example.get("reasoning_effort", ""),
240
+ example.get("max_tokens", 0),
241
+ example.get("temperature", 0.0),
242
+ example.get("top_p", 0.0),
243
+ example.get("system_prompt", ""),
244
+ example.get("response_format", ""),
245
+ # Results
246
+ model_info,
247
+ label_text,
248
+ categories_and_reasoning,
249
+ raw_response,
250
+ gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
251
+ gr.update(value=reasoning_trace or "", visible=reasoning_visible),
252
+ )
253
+ except (ValueError, IndexError):
254
+ return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
255
+
256
+ example_dropdown.change(
257
+ load_example_from_dataset,
258
+ inputs=[example_dropdown, cached_examples, dropdown_choices_state],
259
+ outputs=[
260
+ test_input,
261
+ current_policy_state, # UI components sync automatically via change handler
262
+ model_dropdown,
263
+ reasoning_effort,
264
+ max_tokens,
265
+ temperature,
266
+ top_p,
267
+ system_prompt_textbox,
268
+ response_format_textbox,
269
+ # Results
270
+ model_info_display,
271
+ label_display,
272
+ categories_display,
273
+ model_response_display,
274
+ reasoning_info,
275
+ reasoning_display,
276
+ ],
277
+ )
278
 
279
 
280
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -3,3 +3,4 @@ openai
3
  gradio
4
  python-dotenv
5
  huggingface-hub
 
 
3
  gradio
4
  python-dotenv
5
  huggingface-hub
6
+ datasets
ui/tab_policy.py CHANGED
@@ -81,6 +81,17 @@ def build_policy_tab(base_dir: str) -> dict:
81
  lambda: ("", "", "*No policy loaded*"),
82
  outputs=[current_policy_state, manual_text, policy_preview],
83
  )
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  return {
86
  "current_policy_state": current_policy_state,
 
81
  lambda: ("", "", "*No policy loaded*"),
82
  outputs=[current_policy_state, manual_text, policy_preview],
83
  )
84
+
85
+ # Sync UI components when state changes externally (e.g., from dataset load)
86
+ def sync_policy_ui(policy_text):
87
+ preview_text = policy_text if policy_text else "*No policy loaded*"
88
+ return policy_text, preview_text
89
+
90
+ current_policy_state.change(
91
+ sync_policy_ui,
92
+ inputs=current_policy_state,
93
+ outputs=[manual_text, policy_preview],
94
+ )
95
 
96
  return {
97
  "current_policy_state": current_policy_state,
ui/tab_testing.py CHANGED
@@ -136,6 +136,11 @@ def build_testing_tab() -> dict:
136
  example_dropdown = gr.Dropdown(label="Load Example", choices=list(TEST_EXAMPLES.keys()))
137
  load_example_btn = gr.Button("Load Example", variant="secondary")
138
  run_test_btn = gr.Button("Run Test", variant="primary")
 
 
 
 
 
139
  # Initialize with default model info
140
  initial_model = f"{MODELS[0]['name']} ({MODELS[0]['id']})"
141
  initial_info_lines = [
@@ -170,6 +175,7 @@ def build_testing_tab() -> dict:
170
  "example_dropdown": example_dropdown,
171
  "load_example_btn": load_example_btn,
172
  "run_test_btn": run_test_btn,
 
173
  "model_info_display": model_info_display,
174
  "label_display": label_display,
175
  "categories_display": categories_display,
 
136
  example_dropdown = gr.Dropdown(label="Load Example", choices=list(TEST_EXAMPLES.keys()))
137
  load_example_btn = gr.Button("Load Example", variant="secondary")
138
  run_test_btn = gr.Button("Run Test", variant="primary")
139
+ save_mode = gr.Radio(
140
+ label="Save to Dataset",
141
+ choices=["Don't Save", "Save to Dataset"],
142
+ value="Don't Save"
143
+ )
144
  # Initialize with default model info
145
  initial_model = f"{MODELS[0]['name']} ({MODELS[0]['id']})"
146
  initial_info_lines = [
 
175
  "example_dropdown": example_dropdown,
176
  "load_example_btn": load_example_btn,
177
  "run_test_btn": run_test_btn,
178
+ "save_mode": save_mode,
179
  "model_info_display": model_info_display,
180
  "label_display": label_display,
181
  "categories_display": categories_display,