Spaces:
Running
Running
File size: 6,676 Bytes
36b410a bc0c2e4 36b410a 0fea185 36b410a 0fea185 36b410a 0fea185 36b410a bc0c2e4 36b410a bc0c2e4 36b410a bc0c2e4 36b410a bc0c2e4 36b410a bc0c2e4 36b410a bc0c2e4 36b410a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
"""Dataset tab UI components."""
import os
import sys
import gradio as gr
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.dataset import get_dataset_repo_id, get_roost_dataset_repo_id, load_dataset_examples
from utils.helpers import check_token_availability, get_org_token, get_personal_token
from utils.model_interface import extract_model_id, get_model_info
def format_preview_markdown(example: dict) -> str:
"""Format example data as markdown preview."""
input_text = example.get("input", "")
model_selection = example.get("model_selection", "")
policy_violation = example.get("policy_violation", -1)
categories_and_reasoning = example.get("categories_and_reasoning", "")
policy = example.get("policy", "")
# Extract model name
model_id = extract_model_id(model_selection)
model_info = get_model_info(model_id) if model_id else None
model_name = model_info.get("name", model_id) if model_info else model_id or "Unknown"
# Format label with emoji
if policy_violation == 1:
label_text = "β Policy Violation Detected"
elif policy_violation == 0:
label_text = "β
No Policy Violation"
else:
label_text = "β οΈ Unable to determine label"
# Truncate policy preview
policy_preview = policy # [:512] + "..." if len(policy) > 512 else policy
markdown = f"""## Example Preview
### Input
{input_text}
### Model
{model_name}
---
### Prediction
**{label_text}**
{categories_and_reasoning}
---
### Policy Preview
{policy_preview}
"""
return markdown
def build_dataset_tab() -> dict:
"""Build the dataset tab UI."""
with gr.Tab("π Session Management & Examples"):
gr.Markdown(
"Browse saved test results. Select an example to load it back into the app "
"with all original settings for reproducibility."
)
# Check token availability for button states
has_personal, has_org = check_token_availability(None)
with gr.Row():
refresh_private_btn = gr.Button(
"Load Personal Dataset",
variant="secondary",
interactive=has_personal
)
refresh_roost_btn = gr.Button(
"Load shared ROOST Dataset",
variant="secondary",
interactive=True # Can load if public, even without token
)
# Help text explaining token requirements
dataset_help_text = gr.Markdown(
value=(
f"*Private Dataset: {'β
Available' if has_personal else 'β Requires personal token (OAuth login or .env)'}*\n"
f"*ROOST Dataset: {'β
Available' if has_org else 'β οΈ Can load if public, requires org token to save'}*"
),
visible=True
)
example_dropdown = gr.Dropdown(
label="Select Example",
choices=[],
value=None,
interactive=True
)
preview_markdown = gr.Markdown(value="*Select an example to preview*")
cached_examples = gr.State(value=[])
dropdown_choices_state = gr.State(value=[])
def refresh_dataset(dataset_type: str, oauth_token: gr.OAuthToken | None = None):
"""
Refresh dataset based on type.
Args:
dataset_type: "private" or "roost"
oauth_token: OAuth token from login
"""
if dataset_type == "private":
# Get personal token
personal_token, _ = get_personal_token(oauth_token)
if personal_token is None:
return gr.update(choices=[]), "*Please log in or set personal token to browse private dataset*", [], []
repo_id = get_dataset_repo_id(personal_token)
token = personal_token
else: # roost
# Try org token first, but allow None for public datasets
org_token = get_org_token()
repo_id = get_roost_dataset_repo_id()
token = org_token # Can be None for public access
examples, labels = load_dataset_examples(repo_id, token)
if not examples or not labels:
return gr.update(choices=[], value=None), "*No examples found in dataset*", [], []
preview = format_preview_markdown(examples[0])
return gr.update(choices=labels, value=labels[0]), preview, examples, labels
def preview_example(selected_label, cached_examples_list, dropdown_choices):
"""Update preview when example is selected."""
if not cached_examples_list or not selected_label or not dropdown_choices:
return "*Select an example to preview*"
try:
# Find index by matching label
idx = dropdown_choices.index(selected_label)
if 0 <= idx < len(cached_examples_list):
return format_preview_markdown(cached_examples_list[idx])
except (ValueError, IndexError):
pass
return "*Select an example to preview*"
def refresh_private(oauth_token: gr.OAuthToken | None = None):
"""Refresh private dataset."""
return refresh_dataset("private", oauth_token)
def refresh_roost(oauth_token: gr.OAuthToken | None = None):
"""Refresh ROOST dataset."""
return refresh_dataset("roost", oauth_token)
refresh_private_btn.click(
refresh_private,
inputs=None, # OAuth token auto-injected
outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
)
refresh_roost_btn.click(
refresh_roost,
inputs=None, # OAuth token auto-injected
outputs=[example_dropdown, preview_markdown, cached_examples, dropdown_choices_state]
)
example_dropdown.change(
preview_example,
inputs=[example_dropdown, cached_examples, dropdown_choices_state],
outputs=preview_markdown
)
return {
"example_dropdown": example_dropdown,
"cached_examples": cached_examples,
"dropdown_choices_state": dropdown_choices_state,
"refresh_private_btn": refresh_private_btn,
"refresh_roost_btn": refresh_roost_btn,
"dataset_help_text": dataset_help_text,
}
|