llm_moderation_testing / ui /tab_dataset.py
Yacine Jernite
working with org token
bc0c2e4
raw
history blame
6.68 kB
"""Dataset tab UI components."""
import os
import sys
import gradio as gr
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.dataset import get_dataset_repo_id, get_roost_dataset_repo_id, load_dataset_examples
from utils.helpers import check_token_availability, get_org_token, get_personal_token
from utils.model_interface import extract_model_id, get_model_info
def format_preview_markdown(example: dict) -> str:
"""Format example data as markdown preview."""
input_text = example.get("input", "")
model_selection = example.get("model_selection", "")
policy_violation = example.get("policy_violation", -1)
categories_and_reasoning = example.get("categories_and_reasoning", "")
policy = example.get("policy", "")
# Extract model name
model_id = extract_model_id(model_selection)
model_info = get_model_info(model_id) if model_id else None
model_name = model_info.get("name", model_id) if model_info else model_id or "Unknown"
# Format label with emoji
if policy_violation == 1:
label_text = "❌ Policy Violation Detected"
elif policy_violation == 0:
label_text = "βœ… No Policy Violation"
else:
label_text = "⚠️ Unable to determine label"
# Truncate policy preview
policy_preview = policy # [:512] + "..." if len(policy) > 512 else policy
markdown = f"""## Example Preview
### Input
{input_text}
### Model
{model_name}
---
### Prediction
**{label_text}**
{categories_and_reasoning}
---
### Policy Preview
{policy_preview}
"""
return markdown
def build_dataset_tab() -> dict:
"""Build the dataset tab UI."""
with gr.Tab("πŸ“Š Session Management & Examples"):
gr.Markdown(
"Browse saved test results. Select an example to load it back into the app "
"with all original settings for reproducibility."
)
# Check token availability for button states
has_personal, has_org = check_token_availability(None)
with gr.Row():
refresh_private_btn = gr.Button(
"Load Personal Dataset",
variant="secondary",
interactive=has_personal
)
refresh_roost_btn = gr.Button(
"Load shared ROOST Dataset",
variant="secondary",
interactive=True # Can load if public, even without token
)
# Help text explaining token requirements
dataset_help_text = gr.Markdown(
value=(
f"*Private Dataset: {'βœ… Available' if has_personal else '❌ Requires personal token (OAuth login or .env)'}*\n"
f"*ROOST Dataset: {'βœ… Available' if has_org else '⚠️ Can load if public, requires org token to save'}*"
),
visible=True
)
example_dropdown = gr.Dropdown(
label="Select Example",
choices=[],
value=None,
interactive=True
)
preview_markdown = gr.Markdown(value="*Select an example to preview*")
cached_examples = gr.State(value=[])
dropdown_choices_state = gr.State(value=[])
def refresh_dataset(dataset_type: str, oauth_token: gr.OAuthToken | None = None):
"""
Refresh dataset based on type.
Args:
dataset_type: "private" or "roost"
oauth_token: OAuth token from login
"""
if dataset_type == "private":
# Get personal token
personal_token, _ = get_personal_token(oauth_token)
if personal_token is None:
return gr.update(choices=[]), "*Please log in or set personal token to browse private dataset*", [], []
repo_id = get_dataset_repo_id(personal_token)
token = personal_token
else: # roost
# Try org token first, but allow None for public datasets
org_token = get_org_token()
repo_id = get_roost_dataset_repo_id()
token = org_token # Can be None for public access
examples, labels = load_dataset_examples(repo_id, token)
if not examples or not labels:
return gr.update(choices=[], value=None), "*No examples found in dataset*", [], []
preview = format_preview_markdown(examples[0])
return gr.update(choices=labels, value=labels[0]), preview, examples, labels
def preview_example(selected_label, cached_examples_list, dropdown_choices):
"""Update preview when example is selected."""
if not cached_examples_list or not selected_label or not dropdown_choices:
return "*Select an example to preview*"
try:
# Find index by matching label
idx = dropdown_choices.index(selected_label)
if 0 <= idx < len(cached_examples_list):
return format_preview_markdown(cached_examples_list[idx])
except (ValueError, IndexError):
pass
return "*Select an example to preview*"
def refresh_private(oauth_token: gr.OAuthToken | None = None):
"""Refresh private dataset."""
return refresh_dataset("private", oauth_token)
def refresh_roost(oauth_token: gr.OAuthToken | None = None):
"""Refresh ROOST dataset."""
return refresh_dataset("roost", oauth_token)
refresh_private_btn.click(
refresh_private,
inputs=None, # OAuth token auto-injected
outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
)
refresh_roost_btn.click(
refresh_roost,
inputs=None, # OAuth token auto-injected
outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
)
example_dropdown.change(
preview_example,
inputs=[example_dropdown, cached_examples, dropdown_choices_state],
outputs=preview_markdown
)
return {
"example_dropdown": example_dropdown,
"cached_examples": cached_examples,
"dropdown_choices_state": dropdown_choices_state,
"refresh_private_btn": refresh_private_btn,
"refresh_roost_btn": refresh_roost_btn,
"dataset_help_text": dataset_help_text,
}