|
|
"""Main Gradio app for moderation model testing.""" |
|
|
|
|
|
import os |
|
|
import sys |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) |
|
|
|
|
|
from datetime import datetime |
|
|
|
|
|
from utils.dataset import ( |
|
|
format_categories_and_reasoning, |
|
|
get_dataset_repo_id, |
|
|
get_roost_dataset_repo_id, |
|
|
save_to_dataset, |
|
|
) |
|
|
from utils.helpers import ( |
|
|
check_token_availability, |
|
|
format_dataset_help_text, |
|
|
format_token_status, |
|
|
get_inference_token, |
|
|
get_org_token, |
|
|
get_personal_token, |
|
|
) |
|
|
from utils.model_interface import extract_model_id, run_test |
|
|
from ui.sidebar import build_sidebar |
|
|
from ui.tab_config import build_config_tab |
|
|
from ui.tab_dataset import build_dataset_tab |
|
|
from ui.tab_policy import build_policy_tab |
|
|
from ui.tab_testing import ( |
|
|
build_testing_tab, |
|
|
format_model_info, |
|
|
format_reasoning_info, |
|
|
format_test_result, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_save_data(test_input, current_policy, parsed, model_choice, raw_response, |
|
|
reasoning, reasoning_effort, max_tokens, temperature, top_p, |
|
|
system_prompt_val, response_format_val): |
|
|
"""Prepare data dict for saving to dataset.""" |
|
|
categories_and_reasoning_text = format_categories_and_reasoning(parsed) |
|
|
policy_violation = parsed.get("label", -1) |
|
|
|
|
|
return { |
|
|
"input": test_input, |
|
|
"policy_violation": policy_violation, |
|
|
"categories_and_reasoning": categories_and_reasoning_text, |
|
|
"policy": current_policy, |
|
|
"model_selection": model_choice, |
|
|
"raw_response": raw_response, |
|
|
"reasoning_trace": reasoning or "", |
|
|
"reasoning_effort": reasoning_effort or "", |
|
|
"max_tokens": int(max_tokens), |
|
|
"temperature": float(temperature), |
|
|
"top_p": float(top_p), |
|
|
"system_prompt": system_prompt_val or "", |
|
|
"response_format": response_format_val or "", |
|
|
"timestamp": datetime.now().isoformat(), |
|
|
} |
|
|
|
|
|
|
|
|
def handle_run_test(test_input, current_policy, model_choice, reasoning_effort, max_tokens, temperature, top_p, system_prompt_val, response_format_val, save_mode, oauth_token: gr.OAuthToken | None = None): |
|
|
"""Handle test execution.""" |
|
|
|
|
|
if not test_input or not test_input.strip(): |
|
|
raise gr.Error("Please enter test content before running a test.") |
|
|
|
|
|
if not current_policy or current_policy == "*No policy loaded*": |
|
|
raise gr.Error("Please load a policy first. Go to the Policy Definition tab to upload or select a policy.") |
|
|
|
|
|
|
|
|
|
|
|
hf_token, _ = get_inference_token(oauth_token) |
|
|
if hf_token is None: |
|
|
raise gr.Error("Please log in or set tokens to use Inference Providers. Check the sidebar for authentication options.") |
|
|
|
|
|
model_id = extract_model_id(model_choice) |
|
|
|
|
|
try: |
|
|
result = run_test( |
|
|
model_id=model_id, |
|
|
test_input=test_input, |
|
|
policy=current_policy, |
|
|
hf_token=hf_token, |
|
|
reasoning_effort=reasoning_effort, |
|
|
max_tokens=int(max_tokens), |
|
|
temperature=float(temperature), |
|
|
top_p=float(top_p), |
|
|
system_prompt=system_prompt_val, |
|
|
response_format=response_format_val, |
|
|
) |
|
|
except gr.Error: |
|
|
raise |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Unexpected error during model inference: {str(e)}. Please try again.") |
|
|
|
|
|
label_text, parsed, cat_text, reasoning, raw_response = format_test_result(result) |
|
|
reasoning_visible = bool(reasoning and reasoning.strip()) |
|
|
model_info = format_model_info(model_choice, reasoning_effort) |
|
|
reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_choice, reasoning) |
|
|
|
|
|
|
|
|
if save_mode == "Save to ROOST Dataset": |
|
|
org_token = get_org_token() |
|
|
if org_token: |
|
|
try: |
|
|
data = prepare_save_data( |
|
|
test_input, current_policy, parsed, model_choice, raw_response, |
|
|
reasoning, reasoning_effort, max_tokens, temperature, top_p, |
|
|
system_prompt_val, response_format_val |
|
|
) |
|
|
success, message = save_to_dataset(get_roost_dataset_repo_id(), org_token, data) |
|
|
if not success: |
|
|
raise gr.Error(f"Failed to save to ROOST dataset: {message}. Please check your token permissions.") |
|
|
except gr.Error: |
|
|
raise |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Failed to save to ROOST dataset: {str(e)}. Please check your token permissions and try again.") |
|
|
elif save_mode == "Save to Private Dataset": |
|
|
personal_token, _ = get_personal_token(oauth_token) |
|
|
if personal_token: |
|
|
try: |
|
|
data = prepare_save_data( |
|
|
test_input, current_policy, parsed, model_choice, raw_response, |
|
|
reasoning, reasoning_effort, max_tokens, temperature, top_p, |
|
|
system_prompt_val, response_format_val |
|
|
) |
|
|
success, message = save_to_dataset(get_dataset_repo_id(personal_token), personal_token, data) |
|
|
if not success: |
|
|
raise gr.Error(f"Failed to save to private dataset: {message}. Please check your token permissions.") |
|
|
except gr.Error: |
|
|
raise |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Failed to save to private dataset: {str(e)}. Please check your token permissions and try again.") |
|
|
|
|
|
return ( |
|
|
model_info, |
|
|
label_text, |
|
|
cat_text, |
|
|
raw_response, |
|
|
gr.update(value=reasoning_info_text, visible=reasoning_info_visible), |
|
|
gr.update(value=reasoning or "", visible=reasoning_visible), |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Moderation Model Testing") as demo: |
|
|
gr.Markdown("# Moderation Model Testing Interface") |
|
|
gr.Markdown( |
|
|
"Test moderation models with custom content policies. Define your policy, select a model, " |
|
|
"and evaluate how different models classify content according to your rules. " |
|
|
"Supports reasoning models that provide detailed explanations for their decisions." |
|
|
) |
|
|
|
|
|
|
|
|
sidebar_components = build_sidebar() |
|
|
login_button = sidebar_components["login_button"] |
|
|
token_status_markdown = sidebar_components["token_status"] |
|
|
|
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
testing_components = build_testing_tab() |
|
|
test_input = testing_components["test_input"] |
|
|
run_test_btn = testing_components["run_test_btn"] |
|
|
save_mode = testing_components["save_mode"] |
|
|
save_mode_help = testing_components["save_mode_help"] |
|
|
model_info_display = testing_components["model_info_display"] |
|
|
label_display = testing_components["label_display"] |
|
|
categories_display = testing_components["categories_display"] |
|
|
model_response_display = testing_components["model_response_display"] |
|
|
reasoning_info = testing_components["reasoning_info"] |
|
|
reasoning_display = testing_components["reasoning_display"] |
|
|
|
|
|
policy_components = build_policy_tab(os.path.dirname(__file__)) |
|
|
current_policy_state = policy_components["current_policy_state"] |
|
|
|
|
|
config_components = build_config_tab() |
|
|
model_dropdown = config_components["model_dropdown"] |
|
|
reasoning_effort = config_components["reasoning_effort"] |
|
|
max_tokens = config_components["max_tokens"] |
|
|
temperature = config_components["temperature"] |
|
|
top_p = config_components["top_p"] |
|
|
system_prompt_textbox = config_components["system_prompt_textbox"] |
|
|
response_format_textbox = config_components["response_format_textbox"] |
|
|
|
|
|
dataset_components = build_dataset_tab() |
|
|
example_dropdown = dataset_components["example_dropdown"] |
|
|
cached_examples = dataset_components["cached_examples"] |
|
|
dropdown_choices_state = dataset_components["dropdown_choices_state"] |
|
|
refresh_private_btn = dataset_components["refresh_private_btn"] |
|
|
refresh_roost_btn = dataset_components["refresh_roost_btn"] |
|
|
dataset_help_text = dataset_components["dataset_help_text"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
run_test_btn.click( |
|
|
handle_run_test, |
|
|
inputs=[ |
|
|
test_input, |
|
|
current_policy_state, |
|
|
model_dropdown, |
|
|
reasoning_effort, |
|
|
max_tokens, |
|
|
temperature, |
|
|
top_p, |
|
|
system_prompt_textbox, |
|
|
response_format_textbox, |
|
|
save_mode, |
|
|
], |
|
|
outputs=[ |
|
|
model_info_display, |
|
|
label_display, |
|
|
categories_display, |
|
|
model_response_display, |
|
|
reasoning_info, |
|
|
reasoning_display, |
|
|
], |
|
|
) |
|
|
|
|
|
model_dropdown.change( |
|
|
format_model_info, |
|
|
inputs=[model_dropdown, reasoning_effort], |
|
|
outputs=model_info_display, |
|
|
) |
|
|
|
|
|
reasoning_effort.change( |
|
|
format_model_info, |
|
|
inputs=[model_dropdown, reasoning_effort], |
|
|
outputs=model_info_display, |
|
|
) |
|
|
|
|
|
|
|
|
def handle_login_click(oauth_token: gr.OAuthToken | None = None): |
|
|
"""Handle login button click and update all token-dependent UI.""" |
|
|
from ui.tab_testing import format_save_mode_help |
|
|
|
|
|
has_personal, has_org = check_token_availability(oauth_token) |
|
|
|
|
|
return ( |
|
|
format_token_status(oauth_token), |
|
|
format_save_mode_help(has_personal, has_org), |
|
|
gr.update(interactive=has_personal), |
|
|
gr.update(interactive=True), |
|
|
format_dataset_help_text(has_personal, has_org), |
|
|
) |
|
|
|
|
|
login_button.click( |
|
|
handle_login_click, |
|
|
inputs=None, |
|
|
outputs=[ |
|
|
token_status_markdown, |
|
|
save_mode_help, |
|
|
refresh_private_btn, |
|
|
refresh_roost_btn, |
|
|
dataset_help_text, |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
demo.load( |
|
|
handle_login_click, |
|
|
inputs=None, |
|
|
outputs=[ |
|
|
token_status_markdown, |
|
|
save_mode_help, |
|
|
refresh_private_btn, |
|
|
refresh_roost_btn, |
|
|
dataset_help_text, |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
def load_example_from_dataset(selected_label, cached_examples_list, dropdown_choices_list): |
|
|
"""Load example from dataset and populate all fields.""" |
|
|
if not (cached_examples_list and selected_label and dropdown_choices_list and |
|
|
selected_label in dropdown_choices_list): |
|
|
return [None] * 15 |
|
|
|
|
|
try: |
|
|
idx = dropdown_choices_list.index(selected_label) |
|
|
if not (0 <= idx < len(cached_examples_list)): |
|
|
raise gr.Warning("Selected example index is out of range. Please refresh the dataset.") |
|
|
|
|
|
example = cached_examples_list[idx] |
|
|
policy = example.get("policy", "") or "" |
|
|
policy_violation = example.get("policy_violation", -1) |
|
|
model_selection = example.get("model_selection", "") |
|
|
reasoning_effort_val = example.get("reasoning_effort", "") |
|
|
reasoning_trace = example.get("reasoning_trace", "") |
|
|
|
|
|
|
|
|
emoji = "❌" if policy_violation == 1 else "✅" if policy_violation == 0 else "⚠️" |
|
|
label_text = f"## {emoji} {'Policy Violation Detected' if policy_violation == 1 else 'No Policy Violation' if policy_violation == 0 else 'Unable to determine label'}" |
|
|
|
|
|
reasoning_info_text, reasoning_info_visible = format_reasoning_info(model_selection, reasoning_trace) |
|
|
reasoning_visible = bool(reasoning_trace and reasoning_trace.strip()) |
|
|
|
|
|
return ( |
|
|
example.get("input", ""), |
|
|
policy, |
|
|
example.get("model_selection", ""), |
|
|
reasoning_effort_val, |
|
|
example.get("max_tokens", 0), |
|
|
example.get("temperature", 0.0), |
|
|
example.get("top_p", 0.0), |
|
|
example.get("system_prompt", ""), |
|
|
example.get("response_format", ""), |
|
|
format_model_info(model_selection, reasoning_effort_val), |
|
|
label_text, |
|
|
example.get("categories_and_reasoning", ""), |
|
|
example.get("raw_response", ""), |
|
|
gr.update(value=reasoning_info_text, visible=reasoning_info_visible), |
|
|
gr.update(value=reasoning_trace or "", visible=reasoning_visible), |
|
|
) |
|
|
except gr.Warning: |
|
|
raise |
|
|
except (ValueError, IndexError) as e: |
|
|
raise gr.Warning(f"Failed to load example: {str(e)}. Please try selecting a different example or refresh the dataset.") |
|
|
|
|
|
example_dropdown.change( |
|
|
load_example_from_dataset, |
|
|
inputs=[example_dropdown, cached_examples, dropdown_choices_state], |
|
|
outputs=[ |
|
|
test_input, |
|
|
current_policy_state, |
|
|
model_dropdown, |
|
|
reasoning_effort, |
|
|
max_tokens, |
|
|
temperature, |
|
|
top_p, |
|
|
system_prompt_textbox, |
|
|
response_format_textbox, |
|
|
|
|
|
model_info_display, |
|
|
label_display, |
|
|
categories_display, |
|
|
model_response_display, |
|
|
reasoning_info, |
|
|
reasoning_display, |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
demo.load( |
|
|
handle_login_click, |
|
|
inputs=None, |
|
|
outputs=[ |
|
|
token_status_markdown, |
|
|
save_mode_help, |
|
|
refresh_private_btn, |
|
|
refresh_roost_btn, |
|
|
dataset_help_text, |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(ssr_mode=False) |
|
|
|