llm_moderation_testing

Running

App Files Files Community

Yacine Jernite commited on 12 days ago

Commit

06103c4

1 Parent(s): 338aa78

load dataset

Browse files

Files changed (5) hide show

README.md +3 -0
app.py +126 -1
requirements.txt +1 -0
ui/tab_policy.py +11 -0
ui/tab_testing.py +6 -0

README.md CHANGED Viewed

@@ -11,6 +11,9 @@ license: apache-2.0
 short_description: A model to test different models assessing content policies
 hf_oauth: true
 hf_oauth_scopes:
  - inference-api
 ---

 short_description: A model to test different models assessing content policies
 hf_oauth: true
 hf_oauth_scopes:
+ - read-repos
+ - write-repos
+ - manage-repos
  - inference-api
 ---

app.py CHANGED Viewed

@@ -7,10 +7,14 @@ import gradio as gr
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 from utils.helpers import get_hf_token
 from utils.model_interface import extract_model_id, run_test
 from ui.sidebar import build_sidebar
 from ui.tab_config import build_config_tab
 from ui.tab_policy import build_policy_tab
 from ui.tab_testing import (
     build_testing_tab,
@@ -24,7 +28,7 @@ from ui.tab_testing import (
 # Handlers
 # ============================================================================
-def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, oauth_token: gr.OAuthToken | None = None):
     """Handle test execution."""
     if not test_input or not test_input.strip():
@@ -60,6 +64,33 @@ def handle_run_test(test_input, current_policy, model_choice, reasoning_effort,
     model_info = format_model_info(model_choice, reasoning_effort)
     reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_choice, reasoning)
     return (
         model_info,
         label_text,
@@ -92,6 +123,7 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
                 testing_components = build_testing_tab()
                 test_input = testing_components["test_input"]
                 run_test_btn = testing_components["run_test_btn"]
                 model_info_display = testing_components["model_info_display"]
                 label_display = testing_components["label_display"]
                 categories_display = testing_components["categories_display"]
@@ -111,6 +143,11 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
                 system_prompt_textbox = config_components["system_prompt_textbox"]
                 response_format_textbox = config_components["response_format_textbox"]
     # ============================================================================
     # Event Handlers
     # ============================================================================
@@ -128,6 +165,7 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
             top_p,
             system_prompt_textbox,
             response_format_textbox,
         ],
         outputs=[
             model_info_display,
@@ -150,6 +188,93 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
         inputs=[model_dropdown, reasoning_effort],
         outputs=model_info_display,
     )
 if __name__ == "__main__":

 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from datetime import datetime
+from utils.dataset import format_categories_and_reasoning, save_to_dataset
 from utils.helpers import get_hf_token
 from utils.model_interface import extract_model_id, run_test
 from ui.sidebar import build_sidebar
 from ui.tab_config import build_config_tab
+from ui.tab_dataset import build_dataset_tab
 from ui.tab_policy import build_policy_tab
 from ui.tab_testing import (
     build_testing_tab,
 # Handlers
 # ============================================================================
+def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, save_mode, oauth_token: gr.OAuthToken | None = None):
     """Handle test execution."""
     if not test_input or not test_input.strip():
     model_info = format_model_info(model_choice, reasoning_effort)
     reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_choice, reasoning)
+    # Save to dataset if enabled
+    if save_mode == "Save to Dataset" and hf_token is not None:
+        try:
+            categories_and_reasoning_text = format_categories_and_reasoning(parsed)
+            policy_violation = parsed.get("label", -1)
+            data = {
+                "input": test_input,
+                "policy_violation": policy_violation,
+                "categories_and_reasoning": categories_and_reasoning_text,
+                "policy": current_policy,
+                "model_selection": model_choice,
+                "raw_response": raw_response,
+                "reasoning_trace": reasoning or "",
+                "reasoning_effort": reasoning_effort or "",
+                "max_tokens": int(max_tokens),
+                "temperature": float(temperature),
+                "top_p": float(top_p),
+                "system_prompt": system_prompt_val or "",
+                "response_format": response_format_val or "",
+                "timestamp": datetime.now().isoformat(),
+            }
+            save_to_dataset(hf_token, data)
+        except Exception as e:
+            # Log error but don't break test execution
+            print(f"Failed to save to dataset: {e}")
     return (
         model_info,
         label_text,
                 testing_components = build_testing_tab()
                 test_input = testing_components["test_input"]
                 run_test_btn = testing_components["run_test_btn"]
+                save_mode = testing_components["save_mode"]
                 model_info_display = testing_components["model_info_display"]
                 label_display = testing_components["label_display"]
                 categories_display = testing_components["categories_display"]
                 system_prompt_textbox = config_components["system_prompt_textbox"]
                 response_format_textbox = config_components["response_format_textbox"]
+                dataset_components = build_dataset_tab()
+                example_dropdown = dataset_components["example_dropdown"]
+                cached_examples = dataset_components["cached_examples"]
+                dropdown_choices_state = dataset_components["dropdown_choices_state"]
     # ============================================================================
     # Event Handlers
     # ============================================================================
             top_p,
             system_prompt_textbox,
             response_format_textbox,
+            save_mode,
         ],
         outputs=[
             model_info_display,
         inputs=[model_dropdown, reasoning_effort],
         outputs=model_info_display,
     )
+    # Dataset load handler
+    def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list):
+        """Load example from dataset and populate all fields."""
+        if (not cached_examples_list or not selected_label or
+            not dropdown_choices_list or selected_label not in dropdown_choices_list):
+            # Return None to skip updates
+            return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
+        try:
+            # Find index by matching label
+            idx = dropdown_choices_list.index(selected_label)
+            if idx < 0 or idx >= len(cached_examples_list):
+                return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
+            example = cached_examples_list[idx]
+            # Get policy - ensure it's a string (not None)
+            policy = example.get("policy", "") or ""
+            # Extract saved results
+            policy_violation = example.get("policy_violation", -1)
+            categories_and_reasoning = example.get("categories_and_reasoning", "")
+            raw_response = example.get("raw_response", "")
+            reasoning_trace = example.get("reasoning_trace", "")
+            model_selection = example.get("model_selection", "")
+            reasoning_effort_val = example.get("reasoning_effort", "")
+            # Format label text
+            if policy_violation == 1:
+                label_text = "## ❌ Policy Violation Detected"
+            elif policy_violation == 0:
+                label_text = "## ✅ No Policy Violation"
+            else:
+                label_text = "## ⚠️ Unable to determine label"
+            # Format model info
+            model_info = format_model_info(model_selection, reasoning_effort_val)
+            # Format reasoning info
+            reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_selection, reasoning_trace)
+            reasoning_visible = bool(reasoning_trace and reasoning_trace.strip())
+            return (
+                example.get("input", ""),
+                policy,  # current_policy_state - UI syncs automatically via change handler
+                example.get("model_selection", ""),
+                example.get("reasoning_effort", ""),
+                example.get("max_tokens", 0),
+                example.get("temperature", 0.0),
+                example.get("top_p", 0.0),
+                example.get("system_prompt", ""),
+                example.get("response_format", ""),
+                # Results
+                model_info,
+                label_text,
+                categories_and_reasoning,
+                raw_response,
+                gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
+                gr.update(value=reasoning_trace or "", visible=reasoning_visible),
+            )
+        except (ValueError, IndexError):
+            return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
+    example_dropdown.change(
+        load_example_from_dataset,
+        inputs=[example_dropdown, cached_examples, dropdown_choices_state],
+        outputs=[
+            test_input,
+            current_policy_state,  # UI components sync automatically via change handler
+            model_dropdown,
+            reasoning_effort,
+            max_tokens,
+            temperature,
+            top_p,
+            system_prompt_textbox,
+            response_format_textbox,
+            # Results
+            model_info_display,
+            label_display,
+            categories_display,
+            model_response_display,
+            reasoning_info,
+            reasoning_display,
+        ],
+    )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ openai
 gradio
 python-dotenv
 huggingface-hub

 gradio
 python-dotenv
 huggingface-hub
+datasets

ui/tab_policy.py CHANGED Viewed

@@ -81,6 +81,17 @@ def build_policy_tab(base_dir: str) -> dict:
             lambda: ("", "", "*No policy loaded*"),
             outputs=[current_policy_state, manual_text, policy_preview],
         )
     return {
         "current_policy_state": current_policy_state,

             lambda: ("", "", "*No policy loaded*"),
             outputs=[current_policy_state, manual_text, policy_preview],
         )
+        # Sync UI components when state changes externally (e.g., from dataset load)
+        def sync_policy_ui(policy_text):
+            preview_text = policy_text if policy_text else "*No policy loaded*"
+            return policy_text, preview_text
+        current_policy_state.change(
+            sync_policy_ui,
+            inputs=current_policy_state,
+            outputs=[manual_text, policy_preview],
+        )
     return {
         "current_policy_state": current_policy_state,

ui/tab_testing.py CHANGED Viewed

@@ -136,6 +136,11 @@ def build_testing_tab() -> dict:
                     example_dropdown = gr.Dropdown(label="Load Example", choices=list(TEST_EXAMPLES.keys()))
                     load_example_btn = gr.Button("Load Example", variant="secondary")
                 run_test_btn = gr.Button("Run Test", variant="primary")
                 # Initialize with default model info
                 initial_model = f"{MODELS[0]['name']} ({MODELS[0]['id']})"
                 initial_info_lines = [
@@ -170,6 +175,7 @@ def build_testing_tab() -> dict:
         "example_dropdown": example_dropdown,
         "load_example_btn": load_example_btn,
         "run_test_btn": run_test_btn,
         "model_info_display": model_info_display,
         "label_display": label_display,
         "categories_display": categories_display,

                     example_dropdown = gr.Dropdown(label="Load Example", choices=list(TEST_EXAMPLES.keys()))
                     load_example_btn = gr.Button("Load Example", variant="secondary")
                 run_test_btn = gr.Button("Run Test", variant="primary")
+                save_mode = gr.Radio(
+                    label="Save to Dataset",
+                    choices=["Don't Save", "Save to Dataset"],
+                    value="Don't Save"
+                )
                 # Initialize with default model info
                 initial_model = f"{MODELS[0]['name']} ({MODELS[0]['id']})"
                 initial_info_lines = [
         "example_dropdown": example_dropdown,
         "load_example_btn": load_example_btn,
         "run_test_btn": run_test_btn,
+        "save_mode": save_mode,
         "model_info_display": model_info_display,
         "label_display": label_display,
         "categories_display": categories_display,