"""Testing tab UI components.""" import os import sys import gradio as gr sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import json from utils.constants import MODELS, TEST_EXAMPLES from utils.model_interface import extract_model_id, get_model_info from utils.helpers import check_token_availability def parse_json_response(response: str) -> dict: """Parse JSON response, handling code blocks.""" response = response.strip() try: if "```json" in response: response = response.split("```json")[1].split("```")[0] elif "```" in response: response = response.split("```")[1].split("```")[0] return json.loads(response) except json.JSONDecodeError: return {"label": -1, "categories": []} def format_model_info(model_choice, reasoning_effort) -> str: """Format model information markdown.""" if not model_choice: return "*Select a model in Configuration tab*" model_id = extract_model_id(model_choice) if not model_id: return "*Select a model in Configuration tab*" model_info = get_model_info(model_id) if not model_info: return f"*Model: {model_id}*" model_name = model_info.get("name", model_id) is_thinking = model_info.get("is_thinking", False) supports_reasoning_level = model_info.get("supports_reasoning_level", False) # Handle None or invalid reasoning_effort reasoning_effort_val = reasoning_effort if reasoning_effort else "Low" info_lines = [ f"**Model:** {model_name}", f"- **Thinking Model:** {'Yes' if is_thinking else 'No'}", f"- **Supports Reasoning Level:** {'Yes' if supports_reasoning_level else 'No'}", ] if supports_reasoning_level: info_lines.append(f"- **Reasoning Effort:** {reasoning_effort_val}") return "\n".join(info_lines) def format_reasoning_info(model_choice, reasoning_text) -> tuple[str, bool]: """Format reasoning info markdown and visibility.""" if not model_choice: return "", False model_id = extract_model_id(model_choice) model_info = get_model_info(model_id) if not model_info: return "", False is_thinking = model_info.get("is_thinking", False) # For non-thinking models, always show the message if not is_thinking: return "*This model does not provide reasoning traces.*", True # For thinking models, only show info if there's no reasoning text if not reasoning_text or not reasoning_text.strip(): return "", False return "", False def format_save_mode_help(has_personal: bool, has_org: bool) -> str: """ Format help text explaining save mode options. Args: has_personal: Whether personal token is available has_org: Whether org token is available Returns: Help text string """ lines = [] if not has_personal and not has_org: lines.append("*⚠️ No tokens available. Please log in or set tokens to save results.*") else: if has_org: lines.append("*✅ ROOST Dataset: Available (org token set)*") else: lines.append("*❌ ROOST Dataset: Requires org token (HACKATHON_INFERENCE_TOKEN)*") if has_personal: lines.append("*✅ Private Dataset: Available (personal token set)*") else: lines.append("*❌ Private Dataset: Requires personal token (OAuth login or .env)*") return "\n".join(lines) def format_test_result(result: dict) -> tuple[str, dict, str, str, str]: """ Format test result for display. Returns: Tuple of (label_text, parsed_json, categories_text, reasoning_text, raw_response) """ raw_content = result.get("content", "") parsed = parse_json_response(raw_content) label = parsed.get("label", -1) categories = parsed.get("categories", []) label_text = ( "## ❌ Policy Violation Detected" if label == 1 else "## ✅ No Policy Violation" if label == 0 else "## ⚠️ Unable to determine label" ) if categories and len(categories) > 0: cat_text = "### Categories:\n\n" for cat in categories: category_name = cat.get('category', 'Unknown') reasoning_text = cat.get('reasoning', 'No reasoning provided') policy_source = cat.get('policy_source', '') cat_text += f"- **Category:** {category_name}\n" cat_text += f" - **Explanation:** {reasoning_text}\n" if policy_source: cat_text += f" - **Policy Source:** {policy_source}\n" cat_text += "\n\n" else: cat_text = "*No categories found in response*\n\n" cat_text += "This output expects a valid JSON response, as specified for example in the default prompt.\n\n" cat_text += "The raw response can be seen in the Model Response section below." reasoning = result.get("reasoning", "") # Format raw response for display raw_response_text = f"```\n{raw_content}\n```" return label_text, parsed, cat_text, reasoning or "", raw_response_text def build_testing_tab() -> dict: """Build the testing tab UI and set up simple handlers.""" with gr.Tab("🧪 Testing"): with gr.Row(): with gr.Column(scale=1): gr.Markdown("### Input") with gr.Group(): test_input = gr.Textbox(label="Test Content", placeholder="Enter content to test...", lines=5) example_dropdown = gr.Dropdown(label="Load Example", choices=list(TEST_EXAMPLES.keys()), value=None) load_example_btn = gr.Button("Load Example", variant="secondary") run_test_btn = gr.Button("Run Test", variant="primary") save_mode = gr.Radio( label="Save to Dataset", choices=["Don't Save", "Save to ROOST Dataset", "Save to Private Dataset"], value="Don't Save" ) # Initialize help text based on token availability has_personal, has_org = check_token_availability(None) save_mode_help = gr.Markdown( value=format_save_mode_help(has_personal, has_org), visible=True ) # Initialize with default model info initial_model = f"{MODELS[0]['name']} ({MODELS[0]['id']})" initial_info_lines = [ f"**Model:** {MODELS[0]['name']}", f"- **Thinking Model:** {'Yes' if MODELS[0]['is_thinking'] else 'No'}", f"- **Supports Reasoning Level:** {'Yes' if MODELS[0]['supports_reasoning_level'] else 'No'}", ] if MODELS[0]['supports_reasoning_level']: initial_info_lines.append("- **Reasoning Effort:** Low") model_info_display = gr.Markdown(value="\n".join(initial_info_lines)) with gr.Column(scale=2): gr.Markdown("### Results") label_display = gr.Markdown(value="*Run a test to see results*") with gr.Accordion("Categories & Reasoning", open=True): categories_display = gr.Markdown(value="*No categories yet*") with gr.Accordion("Model Response", open=False): model_response_display = gr.Markdown(value="*No response yet*") with gr.Accordion("Reasoning Trace", open=False): reasoning_info = gr.Markdown(value="", visible=False) reasoning_display = gr.Code(label="", language=None, value="", visible=False) # Simple handlers that don't need cross-tab coordination load_example_btn.click( lambda name: TEST_EXAMPLES.get(name, ""), inputs=example_dropdown, outputs=test_input, ) return { "test_input": test_input, "example_dropdown": example_dropdown, "load_example_btn": load_example_btn, "run_test_btn": run_test_btn, "save_mode": save_mode, "save_mode_help": save_mode_help, "model_info_display": model_info_display, "label_display": label_display, "categories_display": categories_display, "model_response_display": model_response_display, "reasoning_info": reasoning_info, "reasoning_display": reasoning_display, }