"""Dataset tab UI components.""" import os import sys import gradio as gr sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from utils.dataset import get_dataset_repo_id, get_roost_dataset_repo_id, load_dataset_examples from utils.helpers import check_token_availability, get_org_token, get_personal_token from utils.model_interface import extract_model_id, get_model_info def format_preview_markdown(example: dict) -> str: """Format example data as markdown preview.""" input_text = example.get("input", "") model_selection = example.get("model_selection", "") policy_violation = example.get("policy_violation", -1) categories_and_reasoning = example.get("categories_and_reasoning", "") policy = example.get("policy", "") # Extract model name model_id = extract_model_id(model_selection) model_info = get_model_info(model_id) if model_id else None model_name = model_info.get("name", model_id) if model_info else model_id or "Unknown" # Format label with emoji if policy_violation == 1: label_text = "❌ Policy Violation Detected" elif policy_violation == 0: label_text = "✅ No Policy Violation" else: label_text = "⚠️ Unable to determine label" # Truncate policy preview policy_preview = policy # [:512] + "..." if len(policy) > 512 else policy markdown = f"""## Example Preview ### Input {input_text} ### Model {model_name} --- ### Prediction **{label_text}** {categories_and_reasoning} --- ### Policy Preview {policy_preview} """ return markdown def build_dataset_tab() -> dict: """Build the dataset tab UI.""" with gr.Tab("📊 Session Management & Examples"): gr.Markdown( "Browse saved test results. Select an example to load it back into the app " "with all original settings for reproducibility." ) # Check token availability for button states has_personal, has_org = check_token_availability(None) with gr.Row(): refresh_private_btn = gr.Button( "Load Personal Dataset", variant="secondary", interactive=has_personal ) refresh_roost_btn = gr.Button( "Load shared ROOST Dataset", variant="secondary", interactive=True # Can load if public, even without token ) # Help text explaining token requirements dataset_help_text = gr.Markdown( value=( f"*Private Dataset: {'✅ Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n" f"*ROOST Dataset: {'✅ Available' if has_org else '⚠️ Can load if public, requires org token to save'}*" ), visible=True ) example_dropdown = gr.Dropdown( label="Select Example", choices=[], value=None, interactive=True ) preview_markdown = gr.Markdown(value="*Select an example to preview*") cached_examples = gr.State(value=[]) dropdown_choices_state = gr.State(value=[]) def refresh_dataset(dataset_type: str, oauth_token: gr.OAuthToken | None = None): """ Refresh dataset based on type. Args: dataset_type: "private" or "roost" oauth_token: OAuth token from login """ if dataset_type == "private": # Get personal token personal_token, _ = get_personal_token(oauth_token) if personal_token is None: return gr.update(choices=[]), "*Please log in or set personal token to browse private dataset*", [], [] repo_id = get_dataset_repo_id(personal_token) token = personal_token else: # roost # Try org token first, but allow None for public datasets org_token = get_org_token() repo_id = get_roost_dataset_repo_id() token = org_token # Can be None for public access examples, labels = load_dataset_examples(repo_id, token) if not examples or not labels: return gr.update(choices=[], value=None), "*No examples found in dataset*", [], [] preview = format_preview_markdown(examples[0]) return gr.update(choices=labels, value=labels[0]), preview, examples, labels def preview_example(selected_label, cached_examples_list, dropdown_choices): """Update preview when example is selected.""" if not cached_examples_list or not selected_label or not dropdown_choices: return "*Select an example to preview*" try: # Find index by matching label idx = dropdown_choices.index(selected_label) if 0 <= idx < len(cached_examples_list): return format_preview_markdown(cached_examples_list[idx]) except (ValueError, IndexError): pass return "*Select an example to preview*" def refresh_private(oauth_token: gr.OAuthToken | None = None): """Refresh private dataset.""" return refresh_dataset("private", oauth_token) def refresh_roost(oauth_token: gr.OAuthToken | None = None): """Refresh ROOST dataset.""" return refresh_dataset("roost", oauth_token) refresh_private_btn.click( refresh_private, inputs=None, # OAuth token auto-injected outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state] ) refresh_roost_btn.click( refresh_roost, inputs=None, # OAuth token auto-injected outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state] ) example_dropdown.change( preview_example, inputs=[example_dropdown, cached_examples, dropdown_choices_state], outputs=preview_markdown ) return { "example_dropdown": example_dropdown, "cached_examples": cached_examples, "dropdown_choices_state": dropdown_choices_state, "refresh_private_btn": refresh_private_btn, "refresh_roost_btn": refresh_roost_btn, "dataset_help_text": dataset_help_text, }