Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
|
@@ -4,11 +4,23 @@ Gradio app for Polish Twitter Emotion Classifier.
|
|
| 4 |
This application provides an interactive interface for predicting emotions
|
| 5 |
and sentiment in Polish text using a fine-tuned RoBERTa model.
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
"""
|
| 10 |
|
| 11 |
import gradio as gr
|
|
|
|
| 12 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 13 |
import torch
|
| 14 |
import numpy as np
|
|
@@ -25,6 +37,10 @@ DEFAULT_THRESHOLD = 0.5
|
|
| 25 |
# Authentication token for private models
|
| 26 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Emotion emojis for visual display
|
| 29 |
LABEL_EMOJIS = {
|
| 30 |
"radość": "😊",
|
|
@@ -106,6 +122,22 @@ model, tokenizer, labels, calibration_artifacts = load_model()
|
|
| 106 |
print(f"✓ Model loaded successfully with {len(labels)} labels")
|
| 107 |
print(f" Labels: {', '.join(labels)}")
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
def predict_emotions(
|
| 111 |
text: str,
|
|
@@ -115,6 +147,8 @@ def predict_emotions(
|
|
| 115 |
) -> tuple[str, str]:
|
| 116 |
"""
|
| 117 |
Predict emotions and sentiment for Polish text.
|
|
|
|
|
|
|
| 118 |
|
| 119 |
Args:
|
| 120 |
text: Input Polish text
|
|
@@ -278,6 +312,17 @@ def predict_emotions(
|
|
| 278 |
|
| 279 |
all_scores_json = json.dumps(json_output, indent=2, ensure_ascii=False)
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
return result_text, all_scores_json
|
| 282 |
|
| 283 |
|
|
@@ -343,7 +388,7 @@ with gr.Blocks(
|
|
| 343 |
with gr.Accordion("Detailed JSON Output", open=False):
|
| 344 |
json_output = gr.Code(label="Full Prediction Details", language="json")
|
| 345 |
|
| 346 |
-
# Connect the button
|
| 347 |
predict_btn.click(
|
| 348 |
fn=predict_emotions,
|
| 349 |
inputs=[text_input, mode_input, threshold_input, anonymize_input],
|
|
@@ -385,6 +430,7 @@ with gr.Blocks(
|
|
| 385 |
- **Calibrated Mode** (Recommended): Uses temperature scaling and label-specific optimal thresholds for better accuracy and calibration. This mode is recommended for most use cases.
|
| 386 |
- **Default Mode**: Uses sigmoid activation with a single threshold across all labels. Useful for quick predictions or when you want uniform threshold control.
|
| 387 |
|
|
|
|
| 388 |
### Limitations
|
| 389 |
|
| 390 |
- Model is trained on Polish Twitter data and works best with informal social media text
|
|
@@ -408,6 +454,16 @@ with gr.Blocks(
|
|
| 408 |
### 📄 License
|
| 409 |
|
| 410 |
GPL-3.0 License
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
""")
|
| 412 |
|
| 413 |
|
|
|
|
| 4 |
This application provides an interactive interface for predicting emotions
|
| 5 |
and sentiment in Polish text using a fine-tuned RoBERTa model.
|
| 6 |
|
| 7 |
+
Environment Variables:
|
| 8 |
+
HF_TOKEN: HuggingFace authentication token (required for private models and auto-logging)
|
| 9 |
+
export HF_TOKEN=your_huggingface_token
|
| 10 |
+
|
| 11 |
+
HF_DATASET_REPO: HuggingFace dataset name for storing predictions (optional)
|
| 12 |
+
export HF_DATASET_REPO=your-username/predictions-dataset
|
| 13 |
+
Default: "twitter-emotion-pl-feedback"
|
| 14 |
+
|
| 15 |
+
Features:
|
| 16 |
+
- Multi-label emotion and sentiment classification
|
| 17 |
+
- Calibrated predictions with temperature scaling
|
| 18 |
+
- Automatic prediction logging via HuggingFaceDatasetSaver
|
| 19 |
+
- Persistent data storage across space restarts
|
| 20 |
"""
|
| 21 |
|
| 22 |
import gradio as gr
|
| 23 |
+
from gradio.flagging import HuggingFaceDatasetSaver
|
| 24 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 25 |
import torch
|
| 26 |
import numpy as np
|
|
|
|
| 37 |
# Authentication token for private models
|
| 38 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 39 |
|
| 40 |
+
# Flagging configuration - dataset for storing user feedback
|
| 41 |
+
# Set this to your desired dataset name, e.g. "your-username/model-feedback"
|
| 42 |
+
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "twitter-emotion-pl-feedback")
|
| 43 |
+
|
| 44 |
# Emotion emojis for visual display
|
| 45 |
LABEL_EMOJIS = {
|
| 46 |
"radość": "😊",
|
|
|
|
| 122 |
print(f"✓ Model loaded successfully with {len(labels)} labels")
|
| 123 |
print(f" Labels: {', '.join(labels)}")
|
| 124 |
|
| 125 |
+
# Initialize flagging callback for automatic prediction logging
|
| 126 |
+
flagging_callback = None
|
| 127 |
+
if HF_TOKEN:
|
| 128 |
+
try:
|
| 129 |
+
flagging_callback = HuggingFaceDatasetSaver(
|
| 130 |
+
hf_token=HF_TOKEN,
|
| 131 |
+
dataset_name=HF_DATASET_REPO,
|
| 132 |
+
private=True,
|
| 133 |
+
)
|
| 134 |
+
print(f"✓ Auto-logging enabled - all predictions will be saved to: {HF_DATASET_REPO}")
|
| 135 |
+
except Exception as e:
|
| 136 |
+
print(f"⚠ Could not initialize auto-logging: {e}")
|
| 137 |
+
print(" Predictions will not be logged")
|
| 138 |
+
else:
|
| 139 |
+
print("⚠ HF_TOKEN not set - auto-logging disabled")
|
| 140 |
+
|
| 141 |
|
| 142 |
def predict_emotions(
|
| 143 |
text: str,
|
|
|
|
| 147 |
) -> tuple[str, str]:
|
| 148 |
"""
|
| 149 |
Predict emotions and sentiment for Polish text.
|
| 150 |
+
|
| 151 |
+
Automatically logs all predictions to HuggingFace dataset if flagging is enabled.
|
| 152 |
|
| 153 |
Args:
|
| 154 |
text: Input Polish text
|
|
|
|
| 312 |
|
| 313 |
all_scores_json = json.dumps(json_output, indent=2, ensure_ascii=False)
|
| 314 |
|
| 315 |
+
# Automatically log all predictions if flagging is enabled
|
| 316 |
+
if flagging_callback:
|
| 317 |
+
try:
|
| 318 |
+
flagging_callback.flag(
|
| 319 |
+
flag_data=[text, mode, threshold, anonymize, result_text, all_scores_json],
|
| 320 |
+
flag_option="auto_logged",
|
| 321 |
+
username=None,
|
| 322 |
+
)
|
| 323 |
+
except Exception as e:
|
| 324 |
+
print(f"⚠ Error logging prediction: {e}")
|
| 325 |
+
|
| 326 |
return result_text, all_scores_json
|
| 327 |
|
| 328 |
|
|
|
|
| 388 |
with gr.Accordion("Detailed JSON Output", open=False):
|
| 389 |
json_output = gr.Code(label="Full Prediction Details", language="json")
|
| 390 |
|
| 391 |
+
# Connect the predict button
|
| 392 |
predict_btn.click(
|
| 393 |
fn=predict_emotions,
|
| 394 |
inputs=[text_input, mode_input, threshold_input, anonymize_input],
|
|
|
|
| 430 |
- **Calibrated Mode** (Recommended): Uses temperature scaling and label-specific optimal thresholds for better accuracy and calibration. This mode is recommended for most use cases.
|
| 431 |
- **Default Mode**: Uses sigmoid activation with a single threshold across all labels. Useful for quick predictions or when you want uniform threshold control.
|
| 432 |
|
| 433 |
+
|
| 434 |
### Limitations
|
| 435 |
|
| 436 |
- Model is trained on Polish Twitter data and works best with informal social media text
|
|
|
|
| 454 |
### 📄 License
|
| 455 |
|
| 456 |
GPL-3.0 License
|
| 457 |
+
|
| 458 |
+
---
|
| 459 |
+
|
| 460 |
+
### 📊 Data Collection Notice
|
| 461 |
+
|
| 462 |
+
This space automatically logs all predictions for model improvement and research purposes. The collected data includes:
|
| 463 |
+
- Input text and analysis settings
|
| 464 |
+
- Model predictions and confidence scores
|
| 465 |
+
|
| 466 |
+
All data is stored securely in a private HuggingFace dataset and used solely for improving the model's performance.
|
| 467 |
""")
|
| 468 |
|
| 469 |
|