Spaces:
Running
Running
| """Dataset utilities for saving and loading test results.""" | |
| from datetime import datetime | |
| from datasets import Dataset, load_dataset | |
| from huggingface_hub import HfApi | |
| from utils.model_interface import extract_model_id, get_model_info | |
| def get_username_from_token(token: str | None) -> str: | |
| """ | |
| Get username from Hugging Face token using whoami. | |
| Args: | |
| token: HF token string or None | |
| Returns: | |
| Username string, or "yjernite" as fallback if token is None or whoami fails | |
| """ | |
| if token is None: | |
| return "yjernite" | |
| try: | |
| api = HfApi() | |
| user_info = api.whoami(token=token) | |
| return user_info.get("name", "yjernite") | |
| except Exception: | |
| return "yjernite" | |
| def get_dataset_repo_id(token: str | None) -> str: | |
| """ | |
| Get dataset repository ID for the current user. | |
| Args: | |
| token: HF token string or None | |
| Returns: | |
| Dataset repo ID in format "{username}/moderation-test-results" | |
| """ | |
| username = get_username_from_token(token) | |
| return f"{username}/moderation-test-results" | |
| def get_roost_dataset_repo_id() -> str: | |
| """Get ROOST org dataset repository ID.""" | |
| return "roosttools/moderation-test-results" | |
| def load_dataset_from_hub(repo_id: str, token: str | None) -> tuple[list[dict], Exception | None]: | |
| """ | |
| Load dataset from Hub and return list of examples. | |
| Args: | |
| repo_id: Dataset repository ID | |
| token: HF token string or None (None allows public dataset access) | |
| Returns: | |
| Tuple of (list of example dicts, error Exception or None if successful) | |
| """ | |
| try: | |
| # Use load_dataset - more standard way to load from Hub | |
| dataset_dict = load_dataset(repo_id, token=token) | |
| # Get the default split (usually 'train' or first split) | |
| dataset = dataset_dict[list(dataset_dict.keys())[0]] | |
| # Convert to list of dicts | |
| examples = dataset.to_list() | |
| return examples, None | |
| except FileNotFoundError: | |
| # Dataset doesn't exist yet | |
| return [], None | |
| except Exception as e: | |
| # Other errors (network, auth, etc.) - return error | |
| return [], e | |
| def format_categories_and_reasoning(parsed: dict) -> str: | |
| """ | |
| Format categories and reasoning from parsed JSON response. | |
| Args: | |
| parsed: Parsed JSON dict with 'categories' key | |
| Returns: | |
| Formatted markdown string | |
| """ | |
| categories = parsed.get("categories", []) | |
| if categories and len(categories) > 0: | |
| cat_text = "### Categories:\n\n" | |
| for cat in categories: | |
| category_name = cat.get('category', 'Unknown') | |
| reasoning_text = cat.get('reasoning', 'No reasoning provided') | |
| policy_source = cat.get('policy_source', '') | |
| cat_text += f"- **Category:** {category_name}\n" | |
| cat_text += f" - **Explanation:** {reasoning_text}\n" | |
| if policy_source: | |
| cat_text += f" - **Policy Source:** {policy_source}\n" | |
| cat_text += "\n\n" | |
| return cat_text | |
| else: | |
| return "*No categories found in response*\n\nThis output expects a valid JSON response, as specified for example in the default prompt.\n\nThe raw response can be seen in the Model Response section below." | |
| def save_to_dataset(repo_id: str, token: str | None, data: dict) -> tuple[bool, str]: | |
| """ | |
| Save test result to Hugging Face dataset. | |
| Args: | |
| repo_id: Dataset repository ID (e.g., "username/moderation-test-results" or "roosttools/moderation-test-results") | |
| token: HF token string or None | |
| data: Dict with all test result fields | |
| Returns: | |
| Tuple of (success: bool, message: str) | |
| """ | |
| try: | |
| # Load existing dataset and examples using shared function | |
| examples, load_error = load_dataset_from_hub(repo_id, token) | |
| # If there was an error loading (other than FileNotFoundError), raise it | |
| if load_error is not None: | |
| raise load_error | |
| # Append new example | |
| examples.append(data) | |
| # Create new dataset with all examples | |
| dataset = Dataset.from_list(examples) | |
| # Push to hub (private by default) | |
| dataset.push_to_hub(repo_id, token=token, private=True) | |
| return True, f"Saved to {repo_id}" | |
| except FileNotFoundError: | |
| # Dataset doesn't exist yet, create new one | |
| try: | |
| dataset = Dataset.from_list([data]) | |
| dataset.push_to_hub(repo_id, token=token, private=True) | |
| return True, f"Saved to {repo_id}" | |
| except Exception as e: | |
| return False, f"Failed to create new dataset: {str(e)}" | |
| except Exception as e: | |
| return False, f"Failed to save: {str(e)}" | |
| def load_dataset_examples(repo_id: str, token: str | None) -> tuple[list[dict], list[str]]: | |
| """ | |
| Load examples from Hugging Face dataset. | |
| Args: | |
| repo_id: Dataset repository ID | |
| token: HF token string or None (None allows public dataset access) | |
| Returns: | |
| Tuple of (list of example dicts, list of formatted dropdown labels) | |
| """ | |
| # Use shared loading function | |
| examples, load_error = load_dataset_from_hub(repo_id, token) | |
| # If there was an error loading, return empty lists | |
| if load_error is not None: | |
| return [], [] | |
| if not examples: | |
| return [], [] | |
| # Format dropdown labels | |
| labels = [] | |
| for idx, example in enumerate(examples): | |
| input_text = example.get("input", "") | |
| model_selection = example.get("model_selection", "") | |
| policy_violation = example.get("policy_violation", -1) | |
| # Get label emoji | |
| if policy_violation == 1: | |
| label_emoji = "❌" | |
| elif policy_violation == 0: | |
| label_emoji = "✅" | |
| else: | |
| label_emoji = "⚠️" | |
| # Extract model name | |
| model_id = extract_model_id(model_selection) | |
| model_info = get_model_info(model_id) if model_id else None | |
| model_name = model_info.get("name", model_id) if model_info else model_id or "Unknown" | |
| # Truncate input for label | |
| input_preview = input_text[:40] + "..." if len(input_text) > 40 else input_text | |
| label = f"{input_preview} - {label_emoji} - {model_name} - #{idx}" | |
| labels.append(label) | |
| return examples, labels | |