Spaces:
Running
Running
| """Testing tab UI components.""" | |
| import os | |
| import sys | |
| import gradio as gr | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| import json | |
| from utils.constants import MODELS, TEST_EXAMPLES | |
| from utils.model_interface import extract_model_id, get_model_info | |
| from utils.helpers import check_token_availability | |
| def parse_json_response(response: str) -> dict: | |
| """Parse JSON response, handling code blocks.""" | |
| response = response.strip() | |
| try: | |
| if "```json" in response: | |
| response = response.split("```json")[1].split("```")[0] | |
| elif "```" in response: | |
| response = response.split("```")[1].split("```")[0] | |
| return json.loads(response) | |
| except json.JSONDecodeError: | |
| return {"label": -1, "categories": []} | |
| def format_model_info(model_choice, reasoning_effort) -> str: | |
| """Format model information markdown.""" | |
| if not model_choice: | |
| return "*Select a model in Configuration tab*" | |
| model_id = extract_model_id(model_choice) | |
| if not model_id: | |
| return "*Select a model in Configuration tab*" | |
| model_info = get_model_info(model_id) | |
| if not model_info: | |
| return f"*Model: {model_id}*" | |
| model_name = model_info.get("name", model_id) | |
| is_thinking = model_info.get("is_thinking", False) | |
| supports_reasoning_level = model_info.get("supports_reasoning_level", False) | |
| # Handle None or invalid reasoning_effort | |
| reasoning_effort_val = reasoning_effort if reasoning_effort else "Low" | |
| info_lines = [ | |
| f"**Model:** {model_name}", | |
| f"- **Thinking Model:** {'Yes' if is_thinking else 'No'}", | |
| f"- **Supports Reasoning Level:** {'Yes' if supports_reasoning_level else 'No'}", | |
| ] | |
| if supports_reasoning_level: | |
| info_lines.append(f"- **Reasoning Effort:** {reasoning_effort_val}") | |
| return "\n".join(info_lines) | |
| def format_reasoning_info(model_choice, reasoning_text) -> tuple[str, bool]: | |
| """Format reasoning info markdown and visibility.""" | |
| if not model_choice: | |
| return "", False | |
| model_id = extract_model_id(model_choice) | |
| model_info = get_model_info(model_id) | |
| if not model_info: | |
| return "", False | |
| is_thinking = model_info.get("is_thinking", False) | |
| # For non-thinking models, always show the message | |
| if not is_thinking: | |
| return "*This model does not provide reasoning traces.*", True | |
| # For thinking models, only show info if there's no reasoning text | |
| if not reasoning_text or not reasoning_text.strip(): | |
| return "", False | |
| return "", False | |
| def format_save_mode_help(has_personal: bool, has_org: bool) -> str: | |
| """ | |
| Format help text explaining save mode options. | |
| Args: | |
| has_personal: Whether personal token is available | |
| has_org: Whether org token is available | |
| Returns: | |
| Help text string | |
| """ | |
| lines = [] | |
| if not has_personal and not has_org: | |
| lines.append("*⚠️ No tokens available. Please log in or set tokens to save results.*") | |
| else: | |
| if has_org: | |
| lines.append("*✅ ROOST Dataset: Available (org token set)*") | |
| else: | |
| lines.append("*❌ ROOST Dataset: Requires org token (HACKATHON_INFERENCE_TOKEN)*") | |
| if has_personal: | |
| lines.append("*✅ Private Dataset: Available (personal token set)*") | |
| else: | |
| lines.append("*❌ Private Dataset: Requires personal token (OAuth login or .env)*") | |
| return "\n".join(lines) | |
| def format_test_result(result: dict) -> tuple[str, dict, str, str, str]: | |
| """ | |
| Format test result for display. | |
| Returns: | |
| Tuple of (label_text, parsed_json, categories_text, reasoning_text, raw_response) | |
| """ | |
| raw_content = result.get("content", "") | |
| parsed = parse_json_response(raw_content) | |
| label = parsed.get("label", -1) | |
| categories = parsed.get("categories", []) | |
| label_text = ( | |
| "## ❌ Policy Violation Detected" if label == 1 | |
| else "## ✅ No Policy Violation" if label == 0 | |
| else "## ⚠️ Unable to determine label" | |
| ) | |
| if categories and len(categories) > 0: | |
| cat_text = "### Categories:\n\n" | |
| for cat in categories: | |
| category_name = cat.get('category', 'Unknown') | |
| reasoning_text = cat.get('reasoning', 'No reasoning provided') | |
| policy_source = cat.get('policy_source', '') | |
| cat_text += f"- **Category:** {category_name}\n" | |
| cat_text += f" - **Explanation:** {reasoning_text}\n" | |
| if policy_source: | |
| cat_text += f" - **Policy Source:** {policy_source}\n" | |
| cat_text += "\n\n" | |
| else: | |
| cat_text = "*No categories found in response*\n\n" | |
| cat_text += "This output expects a valid JSON response, as specified for example in the default prompt.\n\n" | |
| cat_text += "The raw response can be seen in the Model Response section below." | |
| reasoning = result.get("reasoning", "") | |
| # Format raw response for display | |
| raw_response_text = f"```\n{raw_content}\n```" | |
| return label_text, parsed, cat_text, reasoning or "", raw_response_text | |
| def build_testing_tab() -> dict: | |
| """Build the testing tab UI and set up simple handlers.""" | |
| with gr.Tab("🧪 Testing"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Input") | |
| with gr.Group(): | |
| test_input = gr.Textbox(label="Test Content", placeholder="Enter content to test...", lines=5) | |
| example_dropdown = gr.Dropdown(label="Load Example", choices=list(TEST_EXAMPLES.keys()), value=None) | |
| load_example_btn = gr.Button("Load Example", variant="secondary") | |
| run_test_btn = gr.Button("Run Test", variant="primary") | |
| save_mode = gr.Radio( | |
| label="Save to Dataset", | |
| choices=["Don't Save", "Save to ROOST Dataset", "Save to Private Dataset"], | |
| value="Don't Save" | |
| ) | |
| # Initialize help text based on token availability | |
| has_personal, has_org = check_token_availability(None) | |
| save_mode_help = gr.Markdown( | |
| value=format_save_mode_help(has_personal, has_org), | |
| visible=True | |
| ) | |
| # Initialize with default model info | |
| initial_model = f"{MODELS[0]['name']} ({MODELS[0]['id']})" | |
| initial_info_lines = [ | |
| f"**Model:** {MODELS[0]['name']}", | |
| f"- **Thinking Model:** {'Yes' if MODELS[0]['is_thinking'] else 'No'}", | |
| f"- **Supports Reasoning Level:** {'Yes' if MODELS[0]['supports_reasoning_level'] else 'No'}", | |
| ] | |
| if MODELS[0]['supports_reasoning_level']: | |
| initial_info_lines.append("- **Reasoning Effort:** Low") | |
| model_info_display = gr.Markdown(value="\n".join(initial_info_lines)) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Results") | |
| label_display = gr.Markdown(value="*Run a test to see results*") | |
| with gr.Accordion("Categories & Reasoning", open=True): | |
| categories_display = gr.Markdown(value="*No categories yet*") | |
| with gr.Accordion("Model Response", open=False): | |
| model_response_display = gr.Markdown(value="*No response yet*") | |
| with gr.Accordion("Reasoning Trace", open=False): | |
| reasoning_info = gr.Markdown(value="", visible=False) | |
| reasoning_display = gr.Code(label="", language=None, value="", visible=False) | |
| # Simple handlers that don't need cross-tab coordination | |
| load_example_btn.click( | |
| lambda name: TEST_EXAMPLES.get(name, ""), | |
| inputs=example_dropdown, | |
| outputs=test_input, | |
| ) | |
| return { | |
| "test_input": test_input, | |
| "example_dropdown": example_dropdown, | |
| "load_example_btn": load_example_btn, | |
| "run_test_btn": run_test_btn, | |
| "save_mode": save_mode, | |
| "save_mode_help": save_mode_help, | |
| "model_info_display": model_info_display, | |
| "label_display": label_display, | |
| "categories_display": categories_display, | |
| "model_response_display": model_response_display, | |
| "reasoning_info": reasoning_info, | |
| "reasoning_display": reasoning_display, | |
| } | |