Spaces:

yazoniak
/

twitteremo-pl-classifier

Running on Zero

App Files Files Community

yazoniak commited on Nov 6

Commit

fac038c

verified ·

1 Parent(s): 41c915d

Upload app.py

Browse files

Files changed (1) hide show

app.py +59 -3

app.py CHANGED Viewed

@@ -4,11 +4,23 @@ Gradio app for Polish Twitter Emotion Classifier.
 This application provides an interactive interface for predicting emotions
 and sentiment in Polish text using a fine-tuned RoBERTa model.
-For private models, set the HF_TOKEN environment variable:
-    export HF_TOKEN=your_huggingface_token
 """
 import gradio as gr
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 import numpy as np
@@ -25,6 +37,10 @@ DEFAULT_THRESHOLD = 0.5
 # Authentication token for private models
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 # Emotion emojis for visual display
 LABEL_EMOJIS = {
     "radość": "😊",
@@ -106,6 +122,22 @@ model, tokenizer, labels, calibration_artifacts = load_model()
 print(f"✓ Model loaded successfully with {len(labels)} labels")
 print(f"  Labels: {', '.join(labels)}")
 def predict_emotions(
     text: str,
@@ -115,6 +147,8 @@ def predict_emotions(
 ) -> tuple[str, str]:
     """
     Predict emotions and sentiment for Polish text.
     Args:
         text: Input Polish text
@@ -278,6 +312,17 @@ def predict_emotions(
     all_scores_json = json.dumps(json_output, indent=2, ensure_ascii=False)
     return result_text, all_scores_json
@@ -343,7 +388,7 @@ with gr.Blocks(
     with gr.Accordion("Detailed JSON Output", open=False):
         json_output = gr.Code(label="Full Prediction Details", language="json")
-    # Connect the button
     predict_btn.click(
         fn=predict_emotions,
         inputs=[text_input, mode_input, threshold_input, anonymize_input],
@@ -385,6 +430,7 @@ with gr.Blocks(
     - **Calibrated Mode** (Recommended): Uses temperature scaling and label-specific optimal thresholds for better accuracy and calibration. This mode is recommended for most use cases.
     - **Default Mode**: Uses sigmoid activation with a single threshold across all labels. Useful for quick predictions or when you want uniform threshold control.
     ### Limitations
     - Model is trained on Polish Twitter data and works best with informal social media text
@@ -408,6 +454,16 @@ with gr.Blocks(
     ### 📄 License
     GPL-3.0 License
     """)

 This application provides an interactive interface for predicting emotions
 and sentiment in Polish text using a fine-tuned RoBERTa model.
+Environment Variables:
+    HF_TOKEN: HuggingFace authentication token (required for private models and auto-logging)
+        export HF_TOKEN=your_huggingface_token
+    HF_DATASET_REPO: HuggingFace dataset name for storing predictions (optional)
+        export HF_DATASET_REPO=your-username/predictions-dataset
+        Default: "twitter-emotion-pl-feedback"
+Features:
+    - Multi-label emotion and sentiment classification
+    - Calibrated predictions with temperature scaling
+    - Automatic prediction logging via HuggingFaceDatasetSaver
+    - Persistent data storage across space restarts
 """
 import gradio as gr
+from gradio.flagging import HuggingFaceDatasetSaver
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 import numpy as np
 # Authentication token for private models
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+# Flagging configuration - dataset for storing user feedback
+# Set this to your desired dataset name, e.g. "your-username/model-feedback"
+HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "twitter-emotion-pl-feedback")
 # Emotion emojis for visual display
 LABEL_EMOJIS = {
     "radość": "😊",
 print(f"✓ Model loaded successfully with {len(labels)} labels")
 print(f"  Labels: {', '.join(labels)}")
+# Initialize flagging callback for automatic prediction logging
+flagging_callback = None
+if HF_TOKEN:
+    try:
+        flagging_callback = HuggingFaceDatasetSaver(
+            hf_token=HF_TOKEN,
+            dataset_name=HF_DATASET_REPO,
+            private=True,
+        )
+        print(f"✓ Auto-logging enabled - all predictions will be saved to: {HF_DATASET_REPO}")
+    except Exception as e:
+        print(f"⚠ Could not initialize auto-logging: {e}")
+        print("  Predictions will not be logged")
+else:
+    print("⚠ HF_TOKEN not set - auto-logging disabled")
 def predict_emotions(
     text: str,
 ) -> tuple[str, str]:
     """
     Predict emotions and sentiment for Polish text.
+    Automatically logs all predictions to HuggingFace dataset if flagging is enabled.
     Args:
         text: Input Polish text
     all_scores_json = json.dumps(json_output, indent=2, ensure_ascii=False)
+    # Automatically log all predictions if flagging is enabled
+    if flagging_callback:
+        try:
+            flagging_callback.flag(
+                flag_data=[text, mode, threshold, anonymize, result_text, all_scores_json],
+                flag_option="auto_logged",
+                username=None,
+            )
+        except Exception as e:
+            print(f"⚠ Error logging prediction: {e}")
     return result_text, all_scores_json
     with gr.Accordion("Detailed JSON Output", open=False):
         json_output = gr.Code(label="Full Prediction Details", language="json")
+    # Connect the predict button
     predict_btn.click(
         fn=predict_emotions,
         inputs=[text_input, mode_input, threshold_input, anonymize_input],
     - **Calibrated Mode** (Recommended): Uses temperature scaling and label-specific optimal thresholds for better accuracy and calibration. This mode is recommended for most use cases.
     - **Default Mode**: Uses sigmoid activation with a single threshold across all labels. Useful for quick predictions or when you want uniform threshold control.
     ### Limitations
     - Model is trained on Polish Twitter data and works best with informal social media text
     ### 📄 License
     GPL-3.0 License
+    ---
+    ### 📊 Data Collection Notice
+    This space automatically logs all predictions for model improvement and research purposes. The collected data includes:
+    - Input text and analysis settings
+    - Model predictions and confidence scores
+    All data is stored securely in a private HuggingFace dataset and used solely for improving the model's performance.
     """)