llm_moderation_testing

Running

File size: 16,919 Bytes

"""Main Gradio app for moderation model testing."""

import os
import sys

import gradio as gr

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from datetime import datetime

from utils.dataset import (
    format_categories_and_reasoning,
    get_dataset_repo_id,
    get_roost_dataset_repo_id,
    save_to_dataset,
)
from utils.helpers import (
    check_token_availability,
    format_token_status,
    get_inference_token,
    get_org_token,
    get_personal_token,
)
from utils.model_interface import extract_model_id, run_test
from ui.sidebar import build_sidebar
from ui.tab_config import build_config_tab
from ui.tab_dataset import build_dataset_tab
from ui.tab_policy import build_policy_tab
from ui.tab_testing import (
    build_testing_tab,
    format_model_info,
    format_reasoning_info,
    format_test_result,
)


# ============================================================================
# Handlers
# ============================================================================

def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, save_mode, oauth_token: gr.OAuthToken | None = None):
    """Handle test execution."""

    if not test_input or not test_input.strip():
        model_info = format_model_info(model_choice, reasoning_effort)
        return model_info, "*Please enter test content*", "*No content*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False)

    if not current_policy or current_policy == "*No policy loaded*":
        model_info = format_model_info(model_choice, reasoning_effort)
        return model_info, "*Please load a policy first*", "*No policy*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False)

    # OAuth token is automatically injected by Gradio - we don't pass login_button as input
    # Use inference token (org preferred, falls back to personal)
    hf_token, _ = get_inference_token(oauth_token)
    if hf_token is None:
        model_info = format_model_info(model_choice, reasoning_effort)
        return model_info, "*Please log in or set tokens to use Inference Providers*", "*Authentication required*", "*No response yet*", gr.update(value="", visible=False), gr.update(value="", visible=False)

    model_id = extract_model_id(model_choice)

    result = run_test(
        model_id=model_id,
        test_input=test_input,
        policy=current_policy,
        hf_token=hf_token,
        reasoning_effort=reasoning_effort,
        max_tokens=int(max_tokens),
        temperature=float(temperature),
        top_p=float(top_p),
        system_prompt=system_prompt_val,
        response_format=response_format_val,
    )
    label_text, parsed, cat_text, reasoning, raw_response = format_test_result(result)
    reasoning_visible = bool(reasoning and reasoning.strip())
    model_info = format_model_info(model_choice, reasoning_effort)
    reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_choice, reasoning)
    
    # Save to dataset if enabled
    if save_mode == "Save to ROOST Dataset":
        org_token = get_org_token()
        if org_token:
            try:
                categories_and_reasoning_text = format_categories_and_reasoning(parsed)
                policy_violation = parsed.get("label", -1)
                
                data = {
                    "input": test_input,
                    "policy_violation": policy_violation,
                    "categories_and_reasoning": categories_and_reasoning_text,
                    "policy": current_policy,
                    "model_selection": model_choice,
                    "raw_response": raw_response,
                    "reasoning_trace": reasoning or "",
                    "reasoning_effort": reasoning_effort or "",
                    "max_tokens": int(max_tokens),
                    "temperature": float(temperature),
                    "top_p": float(top_p),
                    "system_prompt": system_prompt_val or "",
                    "response_format": response_format_val or "",
                    "timestamp": datetime.now().isoformat(),
                }
                repo_id = get_roost_dataset_repo_id()
                save_to_dataset(repo_id, org_token, data)
            except Exception as e:
                # Log error but don't break test execution
                print(f"Failed to save to ROOST dataset: {e}")
    elif save_mode == "Save to Private Dataset":
        personal_token, _ = get_personal_token(oauth_token)
        if personal_token:
            try:
                categories_and_reasoning_text = format_categories_and_reasoning(parsed)
                policy_violation = parsed.get("label", -1)
                
                data = {
                    "input": test_input,
                    "policy_violation": policy_violation,
                    "categories_and_reasoning": categories_and_reasoning_text,
                    "policy": current_policy,
                    "model_selection": model_choice,
                    "raw_response": raw_response,
                    "reasoning_trace": reasoning or "",
                    "reasoning_effort": reasoning_effort or "",
                    "max_tokens": int(max_tokens),
                    "temperature": float(temperature),
                    "top_p": float(top_p),
                    "system_prompt": system_prompt_val or "",
                    "response_format": response_format_val or "",
                    "timestamp": datetime.now().isoformat(),
                }
                repo_id = get_dataset_repo_id(personal_token)
                save_to_dataset(repo_id, personal_token, data)
            except Exception as e:
                # Log error but don't break test execution
                print(f"Failed to save to private dataset: {e}")
    
    return (
        model_info,
        label_text,
        cat_text,
        raw_response,
        gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
        gr.update(value=reasoning or "", visible=reasoning_visible),
    )


# ============================================================================
# UI Components
# ============================================================================

with gr.Blocks(title="Moderation Model Testing") as demo:
    gr.Markdown("# Moderation Model Testing Interface")
    gr.Markdown(
        "Test moderation models with custom content policies. Define your policy, select a model, "
        "and evaluate how different models classify content according to your rules. "
        "Supports reasoning models that provide detailed explanations for their decisions."
    )

    # Sidebar (collapsible)
    sidebar_components = build_sidebar()
    login_button = sidebar_components["login_button"]
    token_status_markdown = sidebar_components["token_status"]

    # Main content area with tabs
    with gr.Tabs():
                # Build tabs
                testing_components = build_testing_tab()
                test_input = testing_components["test_input"]
                run_test_btn = testing_components["run_test_btn"]
                save_mode = testing_components["save_mode"]
                save_mode_help = testing_components["save_mode_help"]
                model_info_display = testing_components["model_info_display"]
                label_display = testing_components["label_display"]
                categories_display = testing_components["categories_display"]
                model_response_display = testing_components["model_response_display"]
                reasoning_info = testing_components["reasoning_info"]
                reasoning_display = testing_components["reasoning_display"]

                policy_components = build_policy_tab(os.path.dirname(__file__))
                current_policy_state = policy_components["current_policy_state"]

                config_components = build_config_tab()
                model_dropdown = config_components["model_dropdown"]
                reasoning_effort = config_components["reasoning_effort"]
                max_tokens = config_components["max_tokens"]
                temperature = config_components["temperature"]
                top_p = config_components["top_p"]
                system_prompt_textbox = config_components["system_prompt_textbox"]
                response_format_textbox = config_components["response_format_textbox"]

                dataset_components = build_dataset_tab()
                example_dropdown = dataset_components["example_dropdown"]
                cached_examples = dataset_components["cached_examples"]
                dropdown_choices_state = dataset_components["dropdown_choices_state"]
                refresh_private_btn = dataset_components["refresh_private_btn"]
                refresh_roost_btn = dataset_components["refresh_roost_btn"]
                dataset_help_text = dataset_components["dataset_help_text"]

    # ============================================================================
    # Event Handlers
    # ============================================================================

    # Cross-tab handler: Run test (needs components from all tabs)
    run_test_btn.click(
        handle_run_test,
        inputs=[
            test_input,
            current_policy_state,
            model_dropdown,
            reasoning_effort,
            max_tokens,
            temperature,
            top_p,
            system_prompt_textbox,
            response_format_textbox,
            save_mode,
        ],
        outputs=[
            model_info_display,
            label_display,
            categories_display,
            model_response_display,
            reasoning_info,
            reasoning_display,
        ],
    )
    
    model_dropdown.change(
        format_model_info,
        inputs=[model_dropdown, reasoning_effort],
        outputs=model_info_display,
    )
    
    reasoning_effort.change(
        format_model_info,
        inputs=[model_dropdown, reasoning_effort],
        outputs=model_info_display,
    )
    
    # Token status update handler
    def update_token_status(oauth_token: gr.OAuthToken | None = None):
        """Update token status markdown when OAuth changes."""
        return format_token_status(oauth_token)
    
    # Save mode help text update handler
    def update_save_mode_help(oauth_token: gr.OAuthToken | None = None):
        """Update save mode help text based on token availability."""
        from ui.tab_testing import format_save_mode_help
        has_personal, has_org = check_token_availability(oauth_token)
        return format_save_mode_help(has_personal, has_org)
    
    # Dataset button state update handler
    def update_dataset_button_states(oauth_token: gr.OAuthToken | None = None):
        """Update dataset button states based on token availability."""
        has_personal, has_org = check_token_availability(oauth_token)
        
        # Update help text
        help_text = (
            f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
            f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
        )
        
        return (
            gr.update(interactive=has_personal),  # refresh_private_btn
            gr.update(interactive=True),  # refresh_roost_btn (can load if public)
            help_text,  # dataset_help_text
        )
    
    # Combined handler for login button click - updates all token-dependent UI
    def handle_login_click(oauth_token: gr.OAuthToken | None = None):
        """Handle login button click and update all token-dependent UI."""
        token_status = format_token_status(oauth_token)
        
        from ui.tab_testing import format_save_mode_help
        has_personal, has_org = check_token_availability(oauth_token)
        save_help = format_save_mode_help(has_personal, has_org)
        
        dataset_help = (
            f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
            f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
        )
        
        return (
            token_status,  # token_status_markdown
            save_help,  # save_mode_help
            gr.update(interactive=has_personal),  # refresh_private_btn
            gr.update(interactive=True),  # refresh_roost_btn
            dataset_help,  # dataset_help_text
        )
    
    login_button.click(
        handle_login_click,
        inputs=None,  # OAuth token auto-injected
        outputs=[
            token_status_markdown,
            save_mode_help,
            refresh_private_btn,
            refresh_roost_btn,
            dataset_help_text,
        ]
    )
    
    # Dataset load handler
    def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list):
        """Load example from dataset and populate all fields."""
        if (not cached_examples_list or not selected_label or 
            not dropdown_choices_list or selected_label not in dropdown_choices_list):
            # Return None to skip updates
            return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
        
        try:
            # Find index by matching label
            idx = dropdown_choices_list.index(selected_label)
            if idx < 0 or idx >= len(cached_examples_list):
                return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
            
            example = cached_examples_list[idx]
            
            # Get policy - ensure it's a string (not None)
            policy = example.get("policy", "") or ""
            
            # Extract saved results
            policy_violation = example.get("policy_violation", -1)
            categories_and_reasoning = example.get("categories_and_reasoning", "")
            raw_response = example.get("raw_response", "")
            reasoning_trace = example.get("reasoning_trace", "")
            model_selection = example.get("model_selection", "")
            reasoning_effort_val = example.get("reasoning_effort", "")
            
            # Format label text
            if policy_violation == 1:
                label_text = "## ❌ Policy Violation Detected"
            elif policy_violation == 0:
                label_text = "## ✅ No Policy Violation"
            else:
                label_text = "## ⚠️ Unable to determine label"
            
            # Format model info
            model_info = format_model_info(model_selection, reasoning_effort_val)
            
            # Format reasoning info
            reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_selection, reasoning_trace)
            
            reasoning_visible = bool(reasoning_trace and reasoning_trace.strip())
            
            return (
                example.get("input", ""),
                policy,  # current_policy_state - UI syncs automatically via change handler
                example.get("model_selection", ""),
                example.get("reasoning_effort", ""),
                example.get("max_tokens", 0),
                example.get("temperature", 0.0),
                example.get("top_p", 0.0),
                example.get("system_prompt", ""),
                example.get("response_format", ""),
                # Results
                model_info,
                label_text,
                categories_and_reasoning,
                raw_response,
                gr.update(value=reasoning_info_text, visible=reasoning_info_visible),
                gr.update(value=reasoning_trace or "", visible=reasoning_visible),
            )
        except (ValueError, IndexError):
            return None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
    
    example_dropdown.change(
        load_example_from_dataset,
        inputs=[example_dropdown, cached_examples, dropdown_choices_state],
        outputs=[
            test_input,
            current_policy_state,  # UI components sync automatically via change handler
            model_dropdown,
            reasoning_effort,
            max_tokens,
            temperature,
            top_p,
            system_prompt_textbox,
            response_format_textbox,
            # Results
            model_info_display,
            label_display,
            categories_display,
            model_response_display,
            reasoning_info,
            reasoning_display,
        ],
    )


if __name__ == "__main__":
    demo.launch(ssr_mode=False)