llm_moderation_testing

Running

App Files Files Community

Yacine Jernite commited on 2 days ago

Commit

c90d7c6

1 Parent(s): 463d65e

native error handling

Browse files

Files changed (5) hide show

app.py +38 -25
ui/tab_dataset.py +9 -4
ui/tab_policy.py +56 -11
utils/helpers.py +11 -4
utils/model_interface.py +25 -10

app.py CHANGED Viewed

@@ -69,34 +69,37 @@ def handle_run_test(test_input, current_policy, model_choice, reasoning_effort,
     """Handle test execution."""
     if not test_input or not test_input.strip():
-        model_info = format_model_info(model_choice, reasoning_effort)
-        return model_info, "*Please enter test content*", "*No content*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False)
     if not current_policy or current_policy == "*No policy loaded*":
-        model_info = format_model_info(model_choice, reasoning_effort)
-        return model_info, "*Please load a policy first*", "*No policy*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False)
     # OAuth token is automatically injected by Gradio - we don't pass login_button as input
     # Use inference token (org preferred, falls back to personal)
     hf_token, _ = get_inference_token(oauth_token)
     if hf_token is None:
-        model_info = format_model_info(model_choice, reasoning_effort)
-        return model_info, "*Please log in or set tokens to use Inference Providers*", "*Authentication required*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False)
     model_id = extract_model_id(model_choice)
-    result = run_test(
-        model_id=model_id,
-        test_input=test_input,
-        policy=current_policy,
-        hf_token=hf_token,
-        reasoning_effort=reasoning_effort,
-        max_tokens=int(max_tokens),
-        temperature=float(temperature),
-        top_p=float(top_p),
-        system_prompt=system_prompt_val,
-        response_format=response_format_val,
-    )
     label_text, parsed, cat_text, reasoning, raw_response = format_test_result(result)
     reasoning_visible = bool(reasoning and reasoning.strip())
     model_info = format_model_info(model_choice, reasoning_effort)
@@ -112,9 +115,13 @@ def handle_run_test(test_input, current_policy, model_choice, reasoning_effort,
                     reasoning, reasoning_effort, max_tokens, temperature, top_p,
                     system_prompt_val, response_format_val
                 )
-                save_to_dataset(get_roost_dataset_repo_id(), org_token, data)
             except Exception as e:
-                print(f"Failed to save to ROOST dataset: {e}")
     elif save_mode == "Save to Private Dataset":
         personal_token, _ = get_personal_token(oauth_token)
         if personal_token:
@@ -124,9 +131,13 @@ def handle_run_test(test_input, current_policy, model_choice, reasoning_effort,
                     reasoning, reasoning_effort, max_tokens, temperature, top_p,
                     system_prompt_val, response_format_val
                 )
-                save_to_dataset(get_dataset_repo_id(personal_token), personal_token, data)
             except Exception as e:
-                print(f"Failed to save to private dataset: {e}")
     return (
         model_info,
@@ -281,7 +292,7 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
         try:
             idx = dropdown_choices_list.index(selected_label)
             if not (0 <= idx < len(cached_examples_list)):
-                return [None] * 15
             example = cached_examples_list[idx]
             policy = example.get("policy", "") or ""
@@ -314,8 +325,10 @@ with gr.Blocks(title="Moderation Model Testing") as demo:
                 gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
                 gr.update(value=reasoning_trace or "", visible=reasoning_visible),
             )
-        except (ValueError, IndexError):
-            return [None] * 15
     example_dropdown.change(
         load_example_from_dataset,

     """Handle test execution."""
     if not test_input or not test_input.strip():
+        raise gr.Error("Please enter test content before running a test.")
     if not current_policy or current_policy == "*No policy loaded*":
+        raise gr.Error("Please load a policy first. Go to the Policy Definition tab to upload or select a policy.")
     # OAuth token is automatically injected by Gradio - we don't pass login_button as input
     # Use inference token (org preferred, falls back to personal)
     hf_token, _ = get_inference_token(oauth_token)
     if hf_token is None:
+        raise gr.Error("Please log in or set tokens to use Inference Providers. Check the sidebar for authentication options.")
     model_id = extract_model_id(model_choice)
+    try:
+        result = run_test(
+            model_id=model_id,
+            test_input=test_input,
+            policy=current_policy,
+            hf_token=hf_token,
+            reasoning_effort=reasoning_effort,
+            max_tokens=int(max_tokens),
+            temperature=float(temperature),
+            top_p=float(top_p),
+            system_prompt=system_prompt_val,
+            response_format=response_format_val,
+        )
+    except gr.Error:
+        raise  # Re-raise Gradio errors
+    except Exception as e:
+        raise gr.Error(f"Unexpected error during model inference: {str(e)}. Please try again.")
     label_text, parsed, cat_text, reasoning, raw_response = format_test_result(result)
     reasoning_visible = bool(reasoning and reasoning.strip())
     model_info = format_model_info(model_choice, reasoning_effort)
                     reasoning, reasoning_effort, max_tokens, temperature, top_p,
                     system_prompt_val, response_format_val
                 )
+                success, message = save_to_dataset(get_roost_dataset_repo_id(), org_token, data)
+                if not success:
+                    raise gr.Error(f"Failed to save to ROOST dataset: {message}. Please check your token permissions.")
+            except gr.Error:
+                raise  # Re-raise Gradio errors
             except Exception as e:
+                raise gr.Error(f"Failed to save to ROOST dataset: {str(e)}. Please check your token permissions and try again.")
     elif save_mode == "Save to Private Dataset":
         personal_token, _ = get_personal_token(oauth_token)
         if personal_token:
                     reasoning, reasoning_effort, max_tokens, temperature, top_p,
                     system_prompt_val, response_format_val
                 )
+                success, message = save_to_dataset(get_dataset_repo_id(personal_token), personal_token, data)
+                if not success:
+                    raise gr.Error(f"Failed to save to private dataset: {message}. Please check your token permissions.")
+            except gr.Error:
+                raise  # Re-raise Gradio errors
             except Exception as e:
+                raise gr.Error(f"Failed to save to private dataset: {str(e)}. Please check your token permissions and try again.")
     return (
         model_info,
         try:
             idx = dropdown_choices_list.index(selected_label)
             if not (0 <= idx < len(cached_examples_list)):
+                raise gr.Warning("Selected example index is out of range. Please refresh the dataset.")
             example = cached_examples_list[idx]
             policy = example.get("policy", "") or ""
                 gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
                 gr.update(value=reasoning_trace or "", visible=reasoning_visible),
             )
+        except gr.Warning:
+            raise  # Re-raise Gradio warnings
+        except (ValueError, IndexError) as e:
+            raise gr.Warning(f"Failed to load example: {str(e)}. Please try selecting a different example or refresh the dataset.")
     example_dropdown.change(
         load_example_from_dataset,

ui/tab_dataset.py CHANGED Viewed

@@ -107,7 +107,7 @@ def build_dataset_tab() -> dict:
                 # Get personal token
                 personal_token, _ = get_personal_token(oauth_token)
                 if personal_token is None:
-                    return gr.update(choices=[]), "*Please log in or set personal token to browse private dataset*", [], []
                 repo_id = get_dataset_repo_id(personal_token)
                 token = personal_token
             else:  # roost
@@ -116,9 +116,14 @@ def build_dataset_tab() -> dict:
                 repo_id = get_roost_dataset_repo_id()
                 token = org_token  # Can be None for public access
-            examples, labels = load_dataset_examples(repo_id, token)
-            if not examples or not labels:
-                return gr.update(choices=[], value=None), "*No examples found in dataset*", [], []
             preview = format_preview_markdown(examples[0])
             return gr.update(choices=labels, value=labels[0]), preview, examples, labels

                 # Get personal token
                 personal_token, _ = get_personal_token(oauth_token)
                 if personal_token is None:
+                    raise gr.Error("Please log in or set a personal token to browse your private dataset. Check the sidebar for authentication options.")
                 repo_id = get_dataset_repo_id(personal_token)
                 token = personal_token
             else:  # roost
                 repo_id = get_roost_dataset_repo_id()
                 token = org_token  # Can be None for public access
+            try:
+                examples, labels = load_dataset_examples(repo_id, token)
+                if not examples or not labels:
+                    raise gr.Error(f"No examples found in dataset '{repo_id}'. Try saving some test results first.")
+            except gr.Error:
+                raise  # Re-raise Gradio errors
+            except Exception as e:
+                raise gr.Error(f"Failed to load dataset '{repo_id}': {str(e)}. Please check your token permissions and try again.")
             preview = format_preview_markdown(examples[0])
             return gr.update(choices=labels, value=labels[0]), preview, examples, labels

ui/tab_policy.py CHANGED Viewed

@@ -14,6 +14,7 @@ def build_policy_tab(base_dir: str) -> dict:
     """Build the policy definition tab UI."""
     with gr.Tab("📋 Policy Definition"):
         current_policy_state = gr.State(value="")
         # Existing Policy Accordion
         with gr.Accordion("📥 Load Existing Policy", open=False):
@@ -45,28 +46,72 @@ def build_policy_tab(base_dir: str) -> dict:
         clear_policy_btn = gr.Button("Clear Policy", variant="secondary")
         # Handlers
-        def load_preset_handler(name):
             if not name:
                 return "", "*No policy loaded*", ""
-            policy_text, _ = load_preset_policy(name, base_dir)
-            return policy_text, policy_text, policy_text
         load_preset_btn.click(
             load_preset_handler,
-            inputs=preset_dropdown,
             outputs=[current_policy_state, manual_text, policy_preview],
         )
-        def load_upload_handler(f):
-            if f:
-                policy_text, _ = load_policy_from_file(f.name)
-                return policy_text, policy_text, policy_text
-            return "", "", "*No policy loaded*"
         upload_file.change(
             load_upload_handler,
-            inputs=upload_file,
-            outputs=[current_policy_state, manual_text, policy_preview],
         )
         def update_preview(text):

     """Build the policy definition tab UI."""
     with gr.Tab("📋 Policy Definition"):
         current_policy_state = gr.State(value="")
+        uploaded_policies_state = gr.State(value={})  # Store uploaded policies: {"Uploaded - filename": content}
         # Existing Policy Accordion
         with gr.Accordion("📥 Load Existing Policy", open=False):
         clear_policy_btn = gr.Button("Clear Policy", variant="secondary")
         # Handlers
+        def load_preset_handler(name, uploaded_policies):
+            """Load policy from preset or uploaded policies."""
             if not name:
                 return "", "*No policy loaded*", ""
+            # Check presets first
+            preset_choices = ["Hate Speech Policy", "Violence Policy", "Toxicity Policy"]
+            if name in preset_choices:
+                policy_text, _ = load_preset_policy(name, base_dir)
+                return policy_text, policy_text, policy_text
+            # Check uploaded policies
+            if name in uploaded_policies:
+                policy_text = uploaded_policies[name]
+                return policy_text, policy_text, policy_text
+            return "", "*No policy loaded*", ""
         load_preset_btn.click(
             load_preset_handler,
+            inputs=[preset_dropdown, uploaded_policies_state],
             outputs=[current_policy_state, manual_text, policy_preview],
         )
+        def load_upload_handler(f, uploaded_policies):
+            """Handle file upload: load policy, store it, and update dropdown."""
+            if not f:
+                return "", "", "*No policy loaded*", gr.update(), {}
+            # Extract filename
+            filename = os.path.basename(f.name)
+            upload_key = f"Uploaded - {filename}"
+            # Load policy content
+            policy_text, _ = load_policy_from_file(f.name)
+            # Ensure uploaded_policies is a dict (handle case where it might be None)
+            if uploaded_policies is None:
+                uploaded_policies = {}
+            # Check for duplicate BEFORE storing
+            is_duplicate = upload_key in uploaded_policies
+            # Store policy in state (overwrites if duplicate)
+            uploaded_policies[upload_key] = policy_text
+            # Build updated choices: presets + uploaded policies
+            preset_choices = ["Hate Speech Policy", "Violence Policy", "Toxicity Policy"]
+            all_choices = preset_choices + sorted(uploaded_policies.keys())
+            # Show warning if duplicate (gr.Warning is a function, not an exception)
+            if is_duplicate:
+                gr.Warning(f"Policy '{filename}' already uploaded. Previous version overwritten.")
+            return (
+                policy_text,  # current_policy_state
+                policy_text,  # manual_text
+                policy_text,  # policy_preview
+                gr.update(choices=all_choices),  # preset_dropdown
+                uploaded_policies  # uploaded_policies_state
+            )
         upload_file.change(
             load_upload_handler,
+            inputs=[upload_file, uploaded_policies_state],
+            outputs=[current_policy_state, manual_text, policy_preview, preset_dropdown, uploaded_policies_state],
         )
         def update_preview(text):

utils/helpers.py CHANGED Viewed

@@ -162,13 +162,20 @@ def load_preset_policy(preset_name: str, base_dir: str) -> tuple[str, str]:
                 policy_text = f.read()
             return policy_text, policy_text
         except FileNotFoundError:
-            return f"*Error: Policy file {preset_files[preset_name]} not found*", ""
     return "", ""
 def load_policy_from_file(file_path: str) -> tuple[str, str]:
     """Load policy from uploaded file."""
-    with open(file_path, "r") as f:
-        content = f.read()
-    return content, content

                 policy_text = f.read()
             return policy_text, policy_text
         except FileNotFoundError:
+            raise gr.Error(f"Policy file '{preset_files[preset_name]}' not found at {policy_path}. Please check the file exists.")
+        except Exception as e:
+            raise gr.Error(f"Failed to load policy file '{preset_files[preset_name]}': {str(e)}")
     return "", ""
 def load_policy_from_file(file_path: str) -> tuple[str, str]:
     """Load policy from uploaded file."""
+    try:
+        with open(file_path, "r") as f:
+            content = f.read()
+        return content, content
+    except FileNotFoundError:
+        raise gr.Error(f"File not found: {file_path}. Please try uploading the file again.")
+    except Exception as e:
+        raise gr.Error(f"Failed to read policy file: {str(e)}. Please check the file format and try again.")

utils/model_interface.py CHANGED Viewed

@@ -109,22 +109,37 @@ def run_test(
     response_format: str = RESPONSE_FORMAT,
 ) -> dict:
     """Run test on model."""
     model_info = get_model_info(model_id)
     if not model_info:
-        raise ValueError(f"Unknown model: {model_id}")
     client = OpenAI(base_url=ROUTER_URL, api_key=hf_token)
     messages = make_messages(test_input, policy, model_id, reasoning_effort, system_prompt, response_format)
-    completion = client.chat.completions.create(
-        model=model_id,
-        messages=messages,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        stop=None,
-        extra_headers={"X-HF-Bill-To": "roosttools"},
-    )
     result = {"content": completion.choices[0].message.content}

     response_format: str = RESPONSE_FORMAT,
 ) -> dict:
     """Run test on model."""
+    import gradio as gr
     model_info = get_model_info(model_id)
     if not model_info:
+        raise gr.Error(f"Unknown model: {model_id}. Please select a valid model from the dropdown.")
     client = OpenAI(base_url=ROUTER_URL, api_key=hf_token)
     messages = make_messages(test_input, policy, model_id, reasoning_effort, system_prompt, response_format)
+    try:
+        completion = client.chat.completions.create(
+            model=model_id,
+            messages=messages,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            stop=None,
+            extra_headers={"X-HF-Bill-To": "roosttools"},
+        )
+    except Exception as e:
+        error_msg = str(e)
+        if "401" in error_msg or "authentication" in error_msg.lower():
+            raise gr.Error(f"Authentication failed: {error_msg}. Please check your token permissions.")
+        elif "400" in error_msg or "bad request" in error_msg.lower():
+            raise gr.Error(f"Invalid request: {error_msg}. Please check your input and try again.")
+        elif "429" in error_msg or "rate limit" in error_msg.lower():
+            raise gr.Error(f"Rate limit exceeded: {error_msg}. Please wait a moment and try again.")
+        elif "timeout" in error_msg.lower():
+            raise gr.Error(f"Request timed out: {error_msg}. Please try again with a shorter input or lower max_tokens.")
+        else:
+            raise gr.Error(f"Model inference failed: {error_msg}. Please check your inputs and try again.")
     result = {"content": completion.choices[0].message.content}