Yacine Jernite
working with org token
bc0c2e4
raw
history blame
6.52 kB
"""Dataset utilities for saving and loading test results."""
from datetime import datetime
from datasets import Dataset, load_dataset
from huggingface_hub import HfApi
from utils.model_interface import extract_model_id, get_model_info
def get_username_from_token(token: str | None) -> str:
"""
Get username from Hugging Face token using whoami.
Args:
token: HF token string or None
Returns:
Username string, or "yjernite" as fallback if token is None or whoami fails
"""
if token is None:
return "yjernite"
try:
api = HfApi()
user_info = api.whoami(token=token)
return user_info.get("name", "yjernite")
except Exception:
return "yjernite"
def get_dataset_repo_id(token: str | None) -> str:
"""
Get dataset repository ID for the current user.
Args:
token: HF token string or None
Returns:
Dataset repo ID in format "{username}/moderation-test-results"
"""
username = get_username_from_token(token)
return f"{username}/moderation-test-results"
def get_roost_dataset_repo_id() -> str:
"""Get ROOST org dataset repository ID."""
return "roosttools/moderation-test-results"
def load_dataset_from_hub(repo_id: str, token: str | None) -> tuple[list[dict], Exception | None]:
"""
Load dataset from Hub and return list of examples.
Args:
repo_id: Dataset repository ID
token: HF token string or None (None allows public dataset access)
Returns:
Tuple of (list of example dicts, error Exception or None if successful)
"""
try:
# Use load_dataset - more standard way to load from Hub
dataset_dict = load_dataset(repo_id, token=token)
# Get the default split (usually 'train' or first split)
dataset = dataset_dict[list(dataset_dict.keys())[0]]
# Convert to list of dicts
examples = dataset.to_list()
return examples, None
except FileNotFoundError:
# Dataset doesn't exist yet
return [], None
except Exception as e:
# Other errors (network, auth, etc.) - return error
return [], e
def format_categories_and_reasoning(parsed: dict) -> str:
"""
Format categories and reasoning from parsed JSON response.
Args:
parsed: Parsed JSON dict with 'categories' key
Returns:
Formatted markdown string
"""
categories = parsed.get("categories", [])
if categories and len(categories) > 0:
cat_text = "### Categories:\n\n"
for cat in categories:
category_name = cat.get('category', 'Unknown')
reasoning_text = cat.get('reasoning', 'No reasoning provided')
policy_source = cat.get('policy_source', '')
cat_text += f"- **Category:** {category_name}\n"
cat_text += f" - **Explanation:** {reasoning_text}\n"
if policy_source:
cat_text += f" - **Policy Source:** {policy_source}\n"
cat_text += "\n\n"
return cat_text
else:
return "*No categories found in response*\n\nThis output expects a valid JSON response, as specified for example in the default prompt.\n\nThe raw response can be seen in the Model Response section below."
def save_to_dataset(repo_id: str, token: str | None, data: dict) -> tuple[bool, str]:
"""
Save test result to Hugging Face dataset.
Args:
repo_id: Dataset repository ID (e.g., "username/moderation-test-results" or "roosttools/moderation-test-results")
token: HF token string or None
data: Dict with all test result fields
Returns:
Tuple of (success: bool, message: str)
"""
try:
# Load existing dataset and examples using shared function
examples, load_error = load_dataset_from_hub(repo_id, token)
# If there was an error loading (other than FileNotFoundError), raise it
if load_error is not None:
raise load_error
# Append new example
examples.append(data)
# Create new dataset with all examples
dataset = Dataset.from_list(examples)
# Push to hub (private by default)
dataset.push_to_hub(repo_id, token=token, private=True)
return True, f"Saved to {repo_id}"
except FileNotFoundError:
# Dataset doesn't exist yet, create new one
try:
dataset = Dataset.from_list([data])
dataset.push_to_hub(repo_id, token=token, private=True)
return True, f"Saved to {repo_id}"
except Exception as e:
return False, f"Failed to create new dataset: {str(e)}"
except Exception as e:
return False, f"Failed to save: {str(e)}"
def load_dataset_examples(repo_id: str, token: str | None) -> tuple[list[dict], list[str]]:
"""
Load examples from Hugging Face dataset.
Args:
repo_id: Dataset repository ID
token: HF token string or None (None allows public dataset access)
Returns:
Tuple of (list of example dicts, list of formatted dropdown labels)
"""
# Use shared loading function
examples, load_error = load_dataset_from_hub(repo_id, token)
# If there was an error loading, return empty lists
if load_error is not None:
return [], []
if not examples:
return [], []
# Format dropdown labels
labels = []
for idx, example in enumerate(examples):
input_text = example.get("input", "")
model_selection = example.get("model_selection", "")
policy_violation = example.get("policy_violation", -1)
# Get label emoji
if policy_violation == 1:
label_emoji = "❌"
elif policy_violation == 0:
label_emoji = "✅"
else:
label_emoji = "⚠️"
# Extract model name
model_id = extract_model_id(model_selection)
model_info = get_model_info(model_id) if model_id else None
model_name = model_info.get("name", model_id) if model_info else model_id or "Unknown"
# Truncate input for label
input_preview = input_text[:40] + "..." if len(input_text) > 40 else input_text
label = f"{input_preview} - {label_emoji} - {model_name} - #{idx}"
labels.append(label)
return examples, labels