Spaces:

jvillar02
/

news-classifier-streamlit

Sleeping

App Files Files Community

Joaquin Villar commited on 7 days ago

Commit

b33a33c

verified ·

1 Parent(s): 735af62

Create app.py

Browse files

Files changed (1) hide show

app.py +155 -0

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import streamlit as st
+import torch
+import numpy as np
+import pandas as pd
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from peft import PeftModel, PeftConfig
+import os
+# --- 1. CONFIGURATION & METRICS ---
+# These match the final results from your notebook
+MODEL_METRICS = {
+    "Accuracy": "89.20%",
+    "F1_Score": "0.8931"
+}
+# Your Hugging Face Model Repository
+ADAPTER_REPO = "jvillar-sheff/ag-news-distilbert-lora"
+BASE_MODEL_ID = "distilbert-base-uncased"
+CLASS_NAMES = {0: "World", 1: "Sports", 2: "Business", 3: "Sci/Tech"}
+# --- 2. PAGE SETUP ---
+st.set_page_config(page_title="News Classifier", page_icon="📰", layout="centered")
+# --- 3. MODEL LOADING (Cached) ---
+# @st.cache_resource ensures the model loads only once, making the app fast
+@st.cache_resource
+def load_model():
+    try:
+        # Load Base Model
+        base_model = AutoModelForSequenceClassification.from_pretrained(
+            BASE_MODEL_ID,
+            num_labels=len(CLASS_NAMES),
+            id2label={k: v for k, v in enumerate(CLASS_NAMES.values())},
+            label2id={v: k for k, v in CLASS_NAMES.items()}
+        )
+        # Load Tokenizer (from your repo to ensure consistency)
+        tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO)
+        # Load LoRA Adapters
+        model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
+        # Force CPU (Standard for free Hugging Face Spaces)
+        device = torch.device("cpu")
+        model.to(device)
+        model.eval()
+        return model, tokenizer, device
+    except Exception as e:
+        st.error(f"Error loading model: {e}")
+        return None, None, None
+# Initialize model
+model, tokenizer, device = load_model()
+# --- 4. PREDICTION FUNCTION ---
+def predict(text):
+    # Preprocess text
+    inputs = tokenizer(
+        text,
+        return_tensors="pt",
+        truncation=True,
+        padding="max_length",
+        max_length=128
+    ).to(device)
+    # Inference
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Calculate probabilities
+    logits = outputs.logits
+    probs = torch.nn.functional.softmax(logits, dim=1).squeeze().cpu().numpy()
+    # Get top prediction
+    pred_idx = np.argmax(probs)
+    pred_label = CLASS_NAMES[pred_idx]
+    pred_conf = probs[pred_idx]
+    # Format all probabilities for the chart
+    class_probs = {CLASS_NAMES[i]: float(probs[i]) for i in range(len(CLASS_NAMES))}
+    return pred_label, pred_conf, class_probs
+# --- 5. USER INTERFACE ---
+# Header
+st.title("📰 NLP News Classifier")
+st.markdown("""
+This interface uses a **DistilBERT** model fine-tuned with **LoRA (Low-Rank Adaptation)**.
+It classifies news text into four categories: **World, Sports, Business, or Sci/Tech**.
+""")
+# Green Performance Banner
+st.success(f"✅ **Model Performance (Test Set):** Accuracy: {MODEL_METRICS['Accuracy']} | F1 Score: {MODEL_METRICS['F1_Score']}")
+# Input Area
+text_input = st.text_area(
+    "Enter a News Article or Snippet:",
+    height=150,
+    placeholder="e.g., The stock market rallied today as tech companies reported record profits..."
+)
+# Classify Button
+if st.button("Classify Article", type="primary"):
+    if not text_input.strip():
+        st.warning("Please enter some text first.")
+    else:
+        with st.spinner("Analyzing..."):
+            label, confidence, all_probs = predict(text_input)
+        # --- RESULTS SECTION ---
+        st.divider()
+        # Create two columns for layout
+        col1, col2 = st.columns([1, 1.5])
+        with col1:
+            st.subheader("Prediction")
+            # Display big label
+            st.markdown(f"<h1>{label}</h1>", unsafe_allow_html=True)
+            # Dynamic color for confidence badge
+            if confidence > 0.85:
+                badge_color = "#d4edda" # Green
+                text_color = "#155724"
+            elif confidence > 0.60:
+                badge_color = "#fff3cd" # Yellow/Orange
+                text_color = "#856404"
+            else:
+                badge_color = "#f8d7da" # Red
+                text_color = "#721c24"
+            st.markdown(
+                f"""<div style='background-color:{badge_color}; color:{text_color};
+                padding: 10px; border-radius: 5px; display: inline-block; font-weight: bold;'>
+                Confidence: {confidence:.2%}
+                </div>""",
+                unsafe_allow_html=True
+            )
+        with col2:
+            st.subheader("Probability Breakdown")
+            # Prepare data for chart
+            df_probs = pd.DataFrame(
+                list(all_probs.items()),
+                columns=['Category', 'Probability']
+            )
+            # Show bar chart
+            st.bar_chart(df_probs.set_index('Category'))
+# Footer
+st.markdown("---")
+st.caption("Built by Joaquin Villar Urrutia | Powered by Hugging Face & Streamlit")