Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| Gradio app for Polish Twitter Emotion Classifier. | |
| This application provides an interactive interface for predicting emotions | |
| and sentiment in Polish text using a fine-tuned RoBERTa model. | |
| Environment Variables: | |
| HF_TOKEN: HuggingFace authentication token (required for private models and auto-logging) | |
| export HF_TOKEN=your_huggingface_token | |
| HF_DATASET_REPO: HuggingFace dataset name for storing predictions (optional) | |
| export HF_DATASET_REPO=your-username/predictions-dataset | |
| Default: "twitter-emotion-pl-feedback" | |
| Features: | |
| - Multi-label emotion and sentiment classification | |
| - Calibrated predictions with temperature scaling | |
| - Automatic prediction logging to HuggingFace datasets | |
| - Persistent data storage across space restarts | |
| """ | |
| import gradio as gr | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| import torch | |
| import numpy as np | |
| import json | |
| import os | |
| import re | |
| import spaces | |
| from datetime import datetime | |
| from datasets import Dataset | |
| from huggingface_hub import HfApi, hf_hub_download, list_repo_files | |
| import pandas as pd | |
| import tempfile | |
| # Model configuration | |
| MODEL_NAME = "yazoniak/twitter-emotion-pl-classifier" | |
| MAX_LENGTH = 8192 | |
| DEFAULT_THRESHOLD = 0.5 | |
| # Authentication token for private models | |
| HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
| # Flagging configuration - dataset for storing user feedback | |
| # Set this to your desired dataset name, e.g. "your-username/model-feedback" | |
| HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "twitter-emotion-pl-feedback") | |
| # Emotion emojis for visual display | |
| LABEL_EMOJIS = { | |
| "radość": "😊", | |
| "wstręt": "🤢", | |
| "gniew": "😠", | |
| "przeczuwanie": "🤔", | |
| "pozytywny": "👍", | |
| "negatywny": "👎", | |
| "neutralny": "😐", | |
| "sarkazm": "😏", | |
| } | |
| class HFDatasetLogger: | |
| """ | |
| Custom logger that saves predictions to a HuggingFace dataset. | |
| This provides persistent storage across space restarts by storing data | |
| directly to a HuggingFace dataset repository. | |
| Uses direct parquet file download via hf_hub_download to bypass | |
| any caching issues with load_dataset. | |
| """ | |
| def __init__(self, dataset_name: str, hf_token: str, private: bool = True): | |
| """ | |
| Initialize the HuggingFace dataset logger. | |
| Args: | |
| dataset_name: Name of the dataset (e.g., "username/dataset-name") | |
| hf_token: HuggingFace authentication token | |
| private: Whether to create a private dataset | |
| """ | |
| self.hf_token = hf_token | |
| self.private = private | |
| self.api = HfApi() | |
| self.dataset_exists = False | |
| self.parquet_filename = None | |
| # If dataset_name doesn't have a username prefix, get it from the token | |
| if "/" not in dataset_name: | |
| try: | |
| user_info = self.api.whoami(token=hf_token) | |
| username = user_info["name"] | |
| self.dataset_name = f"{username}/{dataset_name}" | |
| print(f" Resolved dataset name: {self.dataset_name}") | |
| except Exception as e: | |
| print(f" Could not get username from token: {e}") | |
| self.dataset_name = dataset_name | |
| else: | |
| self.dataset_name = dataset_name | |
| # Check if dataset exists by listing files in the repo | |
| try: | |
| files = list_repo_files( | |
| self.dataset_name, | |
| repo_type="dataset", | |
| token=hf_token, | |
| ) | |
| files_list = list(files) # Convert to list to allow multiple iterations | |
| print(f" Files in repo: {files_list}") | |
| # Find the parquet file(s) | |
| parquet_files = [f for f in files_list if f.endswith(".parquet")] | |
| if parquet_files: | |
| # Use the first parquet file (could be at root or in data/ folder) | |
| self.parquet_filename = parquet_files[0] | |
| self.dataset_exists = True | |
| print(f" ✓ Found existing parquet file: {self.parquet_filename}") | |
| else: | |
| print(f" No parquet files found in dataset repo (files: {files_list})") | |
| except Exception as e: | |
| print(f" Dataset repo not found or error: {type(e).__name__}: {e}") | |
| self.dataset_exists = False | |
| def _download_existing_data(self) -> pd.DataFrame | None: | |
| """ | |
| Download existing parquet data directly using hf_hub_download. | |
| Uses force_download=True to bypass all caching. | |
| Returns: | |
| DataFrame with existing data, or None if download fails | |
| """ | |
| if not self.parquet_filename: | |
| print(" No parquet filename set, cannot download") | |
| return None | |
| try: | |
| print(f" Downloading parquet file: {self.parquet_filename}") | |
| # Create a unique temp directory for each download to avoid caching | |
| with tempfile.TemporaryDirectory() as tmp_dir: | |
| local_path = hf_hub_download( | |
| repo_id=self.dataset_name, | |
| filename=self.parquet_filename, | |
| repo_type="dataset", | |
| token=self.hf_token, | |
| force_download=True, # Force fresh download, bypass cache | |
| local_dir=tmp_dir, | |
| ) | |
| print(f" Downloaded to: {local_path}") | |
| df = pd.read_parquet(local_path) | |
| print(f" ✓ Loaded existing data: {len(df)} rows") | |
| return df | |
| except Exception as e: | |
| print(f" ✗ Error downloading existing data: {type(e).__name__}: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| def log( | |
| self, | |
| text: str, | |
| mode: str, | |
| threshold: float, | |
| anonymize: bool, | |
| predictions: str, | |
| json_output: str, | |
| ) -> None: | |
| """ | |
| Log a prediction to the HuggingFace dataset. | |
| Downloads existing parquet directly (bypassing load_dataset cache), | |
| appends new row, and pushes combined data back to Hub. | |
| Args: | |
| text: Input text | |
| mode: Prediction mode | |
| threshold: Threshold value | |
| anonymize: Anonymization setting | |
| predictions: Prediction output (markdown) | |
| json_output: JSON output with scores | |
| """ | |
| try: | |
| # Prepare new data entry as DataFrame | |
| new_row = pd.DataFrame([{ | |
| "timestamp": datetime.utcnow().isoformat(), | |
| "text": text, | |
| "mode": mode, | |
| "threshold": float(threshold), | |
| "anonymize": bool(anonymize), | |
| "predictions": predictions, | |
| "json_output": json_output, | |
| }]) | |
| if self.dataset_exists: | |
| # Download existing data directly from parquet file | |
| existing_df = self._download_existing_data() | |
| if existing_df is not None and len(existing_df) > 0: | |
| # Concatenate DataFrames | |
| combined_df = pd.concat([existing_df, new_row], ignore_index=True) | |
| print(f" Combining {len(existing_df)} existing + 1 new = {len(combined_df)} rows") | |
| else: | |
| # No existing data or download failed, use just the new row | |
| combined_df = new_row | |
| print(" No existing data found, starting fresh") | |
| # Convert to Dataset and push | |
| combined_dataset = Dataset.from_pandas(combined_df) | |
| combined_dataset.push_to_hub( | |
| self.dataset_name, | |
| token=self.hf_token, | |
| private=self.private, | |
| commit_message=f"Add prediction at {datetime.utcnow().isoformat()}", | |
| ) | |
| print(f"✓ Pushed dataset with {len(combined_df)} total rows") | |
| # Update parquet filename if this was the first push | |
| if not self.parquet_filename: | |
| self.parquet_filename = "data/train-00000-of-00001.parquet" | |
| else: | |
| # Create new dataset | |
| new_dataset = Dataset.from_pandas(new_row) | |
| new_dataset.push_to_hub( | |
| self.dataset_name, | |
| token=self.hf_token, | |
| private=self.private, | |
| ) | |
| self.dataset_exists = True | |
| self.parquet_filename = "data/train-00000-of-00001.parquet" | |
| print("✓ Created new dataset with first prediction") | |
| except Exception as e: | |
| print(f"⚠ Error logging to HuggingFace dataset: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| def preprocess_text(text: str, anonymize_mentions: bool = True) -> str: | |
| """ | |
| Preprocess input text by anonymizing mentions. | |
| Args: | |
| text: Input text to preprocess | |
| anonymize_mentions: Whether to replace @mentions with @anonymized_account | |
| Returns: | |
| Preprocessed text | |
| """ | |
| if anonymize_mentions: | |
| text = re.sub(r"@\w+", "@anonymized_account", text) | |
| return text | |
| def load_model(): | |
| """ | |
| Load the model, tokenizer, and calibration artifacts. | |
| For private models, requires HF_TOKEN environment variable to be set. | |
| Returns: | |
| tuple: (model, tokenizer, labels, calibration_artifacts) | |
| """ | |
| print(f"Loading model: {MODEL_NAME}") | |
| if HF_TOKEN: | |
| print(f"Using authentication token for model: {MODEL_NAME}") | |
| model = AutoModelForSequenceClassification.from_pretrained( | |
| MODEL_NAME, token=HF_TOKEN | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN) | |
| else: | |
| print(f"Loading public model: {MODEL_NAME}") | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model.eval() | |
| # Get label mappings from model config | |
| labels = [model.config.id2label[i] for i in range(model.config.num_labels)] | |
| # Try to load calibration artifacts | |
| calibration_artifacts = None | |
| try: | |
| # Try to download from HF Hub | |
| from huggingface_hub import hf_hub_download | |
| calib_path = hf_hub_download( | |
| repo_id=MODEL_NAME, filename="calibration_artifacts.json", token=HF_TOKEN | |
| ) | |
| with open(calib_path, "r") as f: | |
| calibration_artifacts = json.load(f) | |
| print("✓ Calibration artifacts loaded") | |
| except Exception as e: | |
| print(f"⚠ Could not load calibration artifacts: {e}") | |
| print(" Calibrated mode will not be available") | |
| return model, tokenizer, labels, calibration_artifacts | |
| # Load model at startup | |
| print("Loading model...") | |
| model, tokenizer, labels, calibration_artifacts = load_model() | |
| print(f"✓ Model loaded successfully with {len(labels)} labels") | |
| print(f" Labels: {', '.join(labels)}") | |
| # Initialize custom HuggingFace dataset logger for automatic prediction logging | |
| hf_logger = None | |
| if HF_TOKEN: | |
| try: | |
| hf_logger = HFDatasetLogger( | |
| dataset_name=HF_DATASET_REPO, | |
| hf_token=HF_TOKEN, | |
| private=True, | |
| ) | |
| print(f"✓ Auto-logging enabled - all predictions will be saved to: {HF_DATASET_REPO}") | |
| if hf_logger.dataset_exists: | |
| print(" Dataset found - will append new predictions") | |
| else: | |
| print(" Dataset will be created on first prediction") | |
| except Exception as e: | |
| print(f"⚠ Could not initialize auto-logging: {e}") | |
| print(" Predictions will not be logged") | |
| else: | |
| print("⚠ HF_TOKEN not set - auto-logging disabled") | |
| def predict_emotions( | |
| text: str, | |
| mode: str = "Calibrated", | |
| threshold: float = DEFAULT_THRESHOLD, | |
| anonymize: bool = True, | |
| ) -> tuple[str, str]: | |
| """ | |
| Predict emotions and sentiment for Polish text. | |
| Automatically logs all predictions to HuggingFace dataset if flagging is enabled. | |
| Args: | |
| text: Input Polish text | |
| mode: Prediction mode ("Simple" or "Calibrated") | |
| threshold: Classification threshold (0-1) - used only in Simple mode | |
| anonymize: Whether to anonymize @mentions | |
| Returns: | |
| tuple: (formatted_predictions, all_scores_json) | |
| """ | |
| # Validate inputs | |
| if not text or not text.strip(): | |
| return "⚠️ Please enter some text to analyze", "" | |
| # Preprocess text | |
| processed_text = preprocess_text(text, anonymize_mentions=anonymize) | |
| text_changed = processed_text != text | |
| # Validate mode | |
| if mode == "Calibrated" and calibration_artifacts is None: | |
| return ( | |
| "⚠️ Calibrated mode not available (calibration artifacts not found). Please use Default mode.", | |
| "", | |
| ) | |
| # Validate threshold for default mode | |
| if mode == "Default" and (threshold < 0 or threshold > 1): | |
| return "⚠️ Threshold must be between 0 and 1", "" | |
| # Tokenize | |
| inputs = tokenizer( | |
| processed_text, return_tensors="pt", truncation=True, max_length=MAX_LENGTH | |
| ) | |
| # Make prediction | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits.squeeze().numpy() | |
| # Calculate probabilities based on mode | |
| if mode == "Calibrated": | |
| temperatures = calibration_artifacts["temperatures"] | |
| optimal_thresholds = calibration_artifacts["optimal_thresholds"] | |
| probabilities = [] | |
| predictions = [] | |
| used_thresholds = [] | |
| for i, label in enumerate(labels): | |
| temp = temperatures[label] | |
| thresh = optimal_thresholds[label] | |
| calibrated_logit = logits[i] / temp | |
| prob = 1 / (1 + np.exp(-calibrated_logit)) | |
| probabilities.append(prob) | |
| predictions.append(prob > thresh) | |
| used_thresholds.append(thresh) | |
| probabilities = np.array(probabilities) | |
| else: # Default mode | |
| probabilities = 1 / (1 + np.exp(-logits)) | |
| predictions = probabilities > threshold | |
| used_thresholds = [threshold] * len(labels) | |
| # Get assigned labels | |
| assigned_labels = [labels[i] for i in range(len(labels)) if predictions[i]] | |
| # Format output - Start with detected labels prominently | |
| result_text = "# Detected Labels\n\n" | |
| # Assigned labels section | |
| if assigned_labels: | |
| for label in assigned_labels: | |
| emoji = LABEL_EMOJIS.get(label, "🏷️") | |
| idx = labels.index(label) | |
| result_text += f"## {emoji} **{label}** `{probabilities[idx]:.1%}`\n\n" | |
| else: | |
| result_text += "## No Labels Detected\n\n" | |
| result_text += "All confidence scores are below the threshold(s).\n\n" | |
| result_text += "---\n\n" | |
| # Categorize labels | |
| emotions = ["radość", "wstręt", "gniew", "przeczuwanie"] | |
| sentiments = ["pozytywny", "negatywny", "neutralny"] | |
| special = ["sarkazm"] | |
| # Additional details - Less prominent | |
| result_text += "<details>\n" | |
| result_text += "<summary><b>📊 All Scores (click to expand)</b></summary>\n\n" | |
| if text_changed and anonymize: | |
| result_text += f"**Preprocessed text:** _{processed_text}_\n\n" | |
| result_text += f"**Original text:** {text}\n\n" | |
| result_text += f"**Mode:** {mode}" | |
| if mode == "Default": | |
| result_text += f" (threshold: {threshold:.2f})" | |
| result_text += "\n\n" | |
| # Emotions | |
| result_text += "**Emotions:**\n\n" | |
| for label in emotions: | |
| if label in labels: | |
| idx = labels.index(label) | |
| emoji = LABEL_EMOJIS.get(label, "🏷️") | |
| status = "✓" if predictions[idx] else "·" | |
| thresh_info = ( | |
| f" (threshold: {used_thresholds[idx]:.2f})" | |
| if mode == "Calibrated" | |
| else "" | |
| ) | |
| result_text += f"{status} {emoji} {label:15s}: {probabilities[idx]:.4f}{thresh_info}\n\n" | |
| # Sentiment | |
| result_text += "**Sentiment:**\n\n" | |
| for label in sentiments: | |
| if label in labels: | |
| idx = labels.index(label) | |
| emoji = LABEL_EMOJIS.get(label, "🏷️") | |
| status = "✓" if predictions[idx] else "·" | |
| thresh_info = ( | |
| f" (threshold: {used_thresholds[idx]:.2f})" | |
| if mode == "Calibrated" | |
| else "" | |
| ) | |
| result_text += f"{status} {emoji} {label:15s}: {probabilities[idx]:.4f}{thresh_info}\n\n" | |
| # Special | |
| result_text += "**Special:**\n\n" | |
| for label in special: | |
| if label in labels: | |
| idx = labels.index(label) | |
| emoji = LABEL_EMOJIS.get(label, "🏷️") | |
| status = "✓" if predictions[idx] else "·" | |
| thresh_info = ( | |
| f" (threshold: {used_thresholds[idx]:.2f})" | |
| if mode == "Calibrated" | |
| else "" | |
| ) | |
| result_text += f"{status} {emoji} {label:15s}: {probabilities[idx]:.4f}{thresh_info}\n\n" | |
| result_text += "</details>" | |
| # Create JSON output | |
| all_scores = {label: float(probabilities[i]) for i, label in enumerate(labels)} | |
| json_output = { | |
| "assigned_labels": assigned_labels, | |
| "all_scores": all_scores, | |
| "mode": mode, | |
| "text_length": len(text), | |
| "preprocessed": text_changed, | |
| } | |
| if mode == "Calibrated": | |
| json_output["temperatures"] = calibration_artifacts["temperatures"] | |
| json_output["optimal_thresholds"] = calibration_artifacts["optimal_thresholds"] | |
| else: | |
| json_output["threshold"] = threshold | |
| all_scores_json = json.dumps(json_output, indent=2, ensure_ascii=False) | |
| # Automatically log all predictions if logging is enabled | |
| if hf_logger: | |
| try: | |
| hf_logger.log( | |
| text=text, | |
| mode=mode, | |
| threshold=threshold, | |
| anonymize=anonymize, | |
| predictions=result_text, | |
| json_output=all_scores_json, | |
| ) | |
| except Exception as e: | |
| print(f"⚠ Error logging prediction: {e}") | |
| return result_text, all_scores_json | |
| # Example inputs | |
| examples = [ | |
| ["@zgp_intervillage Uwielbiam czekać na peronie 3 godziny! Gratulacje dla #zgp"], | |
| ] | |
| # Create Gradio interface | |
| with gr.Blocks( | |
| title="Polish Twitter Emotion Classifier", theme=gr.themes.Soft() | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 🎭 Polish Twitter Emotion Classifier | |
| This **[model](https://huggingface.co/yazoniak/twitter-emotion-pl-classifier)** predicts emotions and sentiment in Polish text using a fine-tuned **[PKOBP/polish-roberta-8k](https://huggingface.co/PKOBP/polish-roberta-8k)** model. | |
| **Detected labels:** | |
| - **Emotions**: 😊 radość (joy), 🤢 wstręt (disgust), 😠 gniew (anger), 🤔 przeczuwanie (anticipation) | |
| - **Sentiment**: 👍 pozytywny (positive), 👎 negatywny (negative), 😐 neutralny (neutral) | |
| - **Special**: 😏 sarkazm (sarcasm) | |
| The model uses **multi-label classification** - text can have multiple emotions/sentiments simultaneously. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| text_input = gr.Textbox( | |
| label="Tweet to Analyze", | |
| placeholder="e.g., Wspaniały dzień! Jestem bardzo szczęśliwy :)", | |
| lines=4, | |
| ) | |
| with gr.Row(): | |
| mode_input = gr.Radio( | |
| choices=["Calibrated", "Default"], | |
| value="Calibrated", | |
| label="Prediction Mode", | |
| info="Calibrated uses optimal thresholds per label (recommended)", | |
| ) | |
| anonymize_input = gr.Checkbox( | |
| value=True, | |
| label="Anonymize @mentions", | |
| info="Replace @username with @anonymized_account", | |
| ) | |
| threshold_input = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=DEFAULT_THRESHOLD, | |
| step=0.05, | |
| label="Threshold (Default mode only)", | |
| info="Only used when Default mode is selected", | |
| ) | |
| predict_btn = gr.Button("Analyze Emotions", variant="primary", size="lg") | |
| with gr.Column(scale=3): | |
| prediction_output = gr.Markdown(label="Predictions") | |
| with gr.Accordion("Detailed JSON Output", open=False): | |
| json_output = gr.Code(label="Full Prediction Details", language="json") | |
| # Connect the predict button | |
| predict_btn.click( | |
| fn=predict_emotions, | |
| inputs=[text_input, mode_input, threshold_input, anonymize_input], | |
| outputs=[prediction_output, json_output], | |
| ) | |
| # Examples section | |
| gr.Markdown("### Example Input") | |
| gr.Examples( | |
| examples=examples, | |
| inputs=[text_input], | |
| outputs=[prediction_output, json_output], | |
| fn=predict_emotions, | |
| cache_examples=False, | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### Model Performance | |
| | Metric | Validation Score | | |
| |--------|------------------| | |
| | F1 Macro | 0.85 | | |
| | F1 Micro | 0.89 | | |
| | F1 Weighted | 0.89 | | |
| | Subset Accuracy | 0.89 | | |
| ### How to Use | |
| 1. **Enter Polish text**: Paste a tweet, social media post, or any Polish text | |
| 2. **Select mode**: | |
| - **Calibrated** (recommended): Uses temperature scaling and optimal thresholds per label | |
| - **Default**: Uses a single threshold for all labels | |
| 3. **Adjust settings**: Toggle mention anonymization, adjust threshold (Default mode) | |
| 4. **Click Analyze**: Get emotion and sentiment predictions with confidence scores | |
| ### Prediction Modes | |
| - **Calibrated Mode** (Recommended): Uses temperature scaling and label-specific optimal thresholds for better accuracy and calibration. This mode is recommended for most use cases. | |
| - **Default Mode**: Uses sigmoid activation with a single threshold across all labels. Useful for quick predictions or when you want uniform threshold control. | |
| ### Limitations | |
| - Model is trained on Polish Twitter data and works best with informal social media text | |
| - May not generalize well to formal Polish text (news, academic writing) | |
| - Optimal for tweet-length texts (not very long documents) | |
| - Multi-label nature means texts can have seemingly contradictory labels (e.g., sarkazm + pozytywny) | |
| ### Citation | |
| If you use this model, please cite: | |
| ```bibtex | |
| @model{yazoniak2025twitteremotionpl, | |
| author = {yazoniak}, | |
| title = {Polish Twitter Emotion Classifier}, | |
| year = {2025}, | |
| publisher = {Hugging Face}, | |
| url = {https://huggingface.co/yazoniak/twitter-emotion-pl-classifier} | |
| } | |
| ``` | |
| ### 📄 License | |
| GPL-3.0 License | |
| --- | |
| ### 📊 Data Collection Notice | |
| This space automatically logs all predictions for model improvement and research purposes. The collected data includes: | |
| - Input text and analysis settings | |
| - Model predictions and confidence scores | |
| All data is stored securely in a private HuggingFace dataset and used solely for improving the model's performance. | |
| """) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() | |