Spaces:
Running
Running
commit
Browse files- .gitignore +6 -0
- app.py +92 -0
- data_loader.py +48 -0
- imdb.csv +48 -0
- model.py +39 -0
- requirements.txt +5 -0
- utils.py +15 -0
.gitignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pkl
|
| 4 |
+
.env
|
| 5 |
+
kaggle.json
|
| 6 |
+
*.csv
|
app.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
"""
|
| 3 |
+
Gradio app for the workshop.
|
| 4 |
+
Two modes:
|
| 5 |
+
- Single Review -> instant sentiment
|
| 6 |
+
- Upload CSV (with 'review' column) -> run batch predictions and download CSV
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from model import analyze_text, analyze_batch
|
| 12 |
+
from data_loader import load_data
|
| 13 |
+
from utils import add_predictions_to_df
|
| 14 |
+
from io import StringIO, BytesIO
|
| 15 |
+
|
| 16 |
+
# Single prediction function
|
| 17 |
+
def predict_single(review: str):
|
| 18 |
+
pred = analyze_text(review)
|
| 19 |
+
label = pred.get('label')
|
| 20 |
+
score = pred.get('score')
|
| 21 |
+
return label, float(score)
|
| 22 |
+
|
| 23 |
+
# Batch prediction function for uploaded CSV
|
| 24 |
+
def predict_file(file_obj):
|
| 25 |
+
"""
|
| 26 |
+
Accepts an uploaded CSV file (file-like). Returns a downloadable CSV with predictions.
|
| 27 |
+
"""
|
| 28 |
+
try:
|
| 29 |
+
df = pd.read_csv(file_obj.name) if hasattr(file_obj, "name") else pd.read_csv(file_obj)
|
| 30 |
+
except Exception as e:
|
| 31 |
+
return "Error reading CSV: " + str(e), None
|
| 32 |
+
|
| 33 |
+
# find review-like column
|
| 34 |
+
cols = df.columns
|
| 35 |
+
text_cols = [c for c in cols if 'review' in c.lower() or 'text' in c.lower()]
|
| 36 |
+
if not text_cols:
|
| 37 |
+
return "Uploaded CSV must contain a text column named like 'review' or 'text'.", None
|
| 38 |
+
text_col = text_cols[0]
|
| 39 |
+
|
| 40 |
+
texts = df[text_col].fillna("").astype(str).tolist()
|
| 41 |
+
preds = analyze_batch(texts, batch_size=32)
|
| 42 |
+
out_df = add_predictions_to_df(df, preds)
|
| 43 |
+
|
| 44 |
+
# prepare downloadable CSV in memory
|
| 45 |
+
buffer = StringIO()
|
| 46 |
+
out_df.to_csv(buffer, index=False)
|
| 47 |
+
buffer.seek(0)
|
| 48 |
+
return "Success: Predictions added", ("predictions.csv", buffer.getvalue(), "text/csv")
|
| 49 |
+
|
| 50 |
+
# Optional demo: load a few rows from local imdb.csv (if present)
|
| 51 |
+
def demo_sample():
|
| 52 |
+
try:
|
| 53 |
+
df = load_data()
|
| 54 |
+
sample = df.head(5).to_dict(orient='records')
|
| 55 |
+
# show text samples in the UI
|
| 56 |
+
texts = [r['review'] for r in sample]
|
| 57 |
+
preds = analyze_batch(texts, batch_size=8)
|
| 58 |
+
return {f"Review {i+1}": (texts[i], preds[i]['label'], preds[i]['score']) for i in range(len(texts))}
|
| 59 |
+
except Exception as e:
|
| 60 |
+
return {"error": str(e)}
|
| 61 |
+
|
| 62 |
+
with gr.Blocks() as demo:
|
| 63 |
+
gr.Markdown("# Movie Review Sentiment — Workshop App")
|
| 64 |
+
gr.Markdown("**Single prediction** — Type a review and get sentiment.")
|
| 65 |
+
with gr.Row():
|
| 66 |
+
txt = gr.Textbox(lines=4, label="Enter movie review here")
|
| 67 |
+
out_label = gr.Textbox(label="Predicted label")
|
| 68 |
+
out_score = gr.Number(label="Confidence score")
|
| 69 |
+
btn = gr.Button("Analyze")
|
| 70 |
+
btn.click(fn=lambda t: predict_single(t), inputs=[txt], outputs=[out_label, out_score])
|
| 71 |
+
|
| 72 |
+
gr.Markdown("----")
|
| 73 |
+
gr.Markdown("**Batch prediction** — Upload a CSV with a `review` (or `text`) column.")
|
| 74 |
+
csv_in = gr.File(label="Upload CSV")
|
| 75 |
+
status = gr.Textbox(label="Status")
|
| 76 |
+
download_button = gr.File(label="Download predictions (after running)")
|
| 77 |
+
run_btn = gr.Button("Run batch predictions")
|
| 78 |
+
def run_and_return(file):
|
| 79 |
+
msg, download = predict_file(file)
|
| 80 |
+
# gr.File requires a filename/path: return tuple (filename, content, mime)
|
| 81 |
+
return msg, download
|
| 82 |
+
run_btn.click(fn=run_and_return, inputs=[csv_in], outputs=[status, download_button])
|
| 83 |
+
|
| 84 |
+
gr.Markdown("----")
|
| 85 |
+
gr.Markdown("**Demo sample (if `imdb.csv` exists locally)**")
|
| 86 |
+
sample_btn = gr.Button("Load demo sample & predict")
|
| 87 |
+
demo_output = gr.JSON()
|
| 88 |
+
sample_btn.click(fn=demo_sample, inputs=None, outputs=[demo_output])
|
| 89 |
+
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
# When running locally for the workshop
|
| 92 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
data_loader.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# data_loader.py
|
| 2 |
+
"""
|
| 3 |
+
Simple dataset loader for the workshop.
|
| 4 |
+
Expected CSV columns: 'review' (text), optionally 'label' (gold label)
|
| 5 |
+
Place your Kaggle CSV in the project folder as imdb.csv (or change the path below).
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from typing import Tuple
|
| 10 |
+
|
| 11 |
+
DEFAULT_CSV = "imdb.csv"
|
| 12 |
+
|
| 13 |
+
def load_data(path: str = DEFAULT_CSV) -> pd.DataFrame:
|
| 14 |
+
"""
|
| 15 |
+
Load dataset and perform basic cleaning.
|
| 16 |
+
Returns a pandas DataFrame with at least a 'review' column.
|
| 17 |
+
"""
|
| 18 |
+
df = pd.read_csv(path)
|
| 19 |
+
# Normalize column names
|
| 20 |
+
df.columns = [c.strip() for c in df.columns]
|
| 21 |
+
# Heuristics to find text column
|
| 22 |
+
text_cols = [c for c in df.columns if 'review' in c.lower() or 'text' in c.lower()]
|
| 23 |
+
if not text_cols:
|
| 24 |
+
# fallback: take the first string column
|
| 25 |
+
string_cols = [c for c in df.columns if df[c].dtype == object]
|
| 26 |
+
if not string_cols:
|
| 27 |
+
raise ValueError("No text-like column found in the CSV. Ensure a 'review' column exists.")
|
| 28 |
+
text_col = string_cols[0]
|
| 29 |
+
else:
|
| 30 |
+
text_col = text_cols[0]
|
| 31 |
+
|
| 32 |
+
# rename to standard name
|
| 33 |
+
if text_col != 'review':
|
| 34 |
+
df = df.rename(columns={text_col: 'review'})
|
| 35 |
+
|
| 36 |
+
# drop rows with missing reviews
|
| 37 |
+
df = df.dropna(subset=['review']).reset_index(drop=True)
|
| 38 |
+
|
| 39 |
+
return df
|
| 40 |
+
|
| 41 |
+
def sample_data(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
|
| 42 |
+
"""Return a small random sample for fast local testing / demo."""
|
| 43 |
+
return df.sample(min(n, len(df))).reset_index(drop=True)
|
| 44 |
+
|
| 45 |
+
if __name__ == "__main__":
|
| 46 |
+
df = load_data()
|
| 47 |
+
print("Loaded dataset with", len(df), "rows")
|
| 48 |
+
print(df.head())
|
imdb.csv
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Name,Reviews,Date
|
| 2 |
+
Ken Tucker,The pleasure to be taken from 'Office' isn't merely that of laughter -- it's the pleasure of watching a piece of entertainment so perfectly made and so delicately acted.,"Jun 14, 2013"
|
| 3 |
+
,"As ever, it's all in the details.","Jun 18, 2013"
|
| 4 |
+
,"It takes a little while to get into it (episode two clinched it for me), but once you get used to the accents and dry humor, you're hooked. [23 Jan 2003]","Jul 25, 2013"
|
| 5 |
+
Monica Collins,"""The Office"" makes you cringe in delight and heave with giggles when you see the absurdity of it all. [23 Oct 2003]","Jul 25, 2013"
|
| 6 |
+
Matthew Gilbert,"It is depressing, brilliant, hysterical, excruciating, full of irony, and nothing you'd ever expect to find on American network TV. Rather than sweetening the workplace with fantasies of a home away from home, ""The Office"" heightens the reality and disconnection of corporate life until it is absurdly funny. The show doesn't touch your heart so much as tickle your spleen. [9 Oct 2003]","Jul 25, 2013"
|
| 7 |
+
Steve Johnson,"""The Office"" is everything television comedy can and ought to be but almost never is. ... The result is subtle, searing and laugh-bitterly-out-loud funny, like a series of the darkest Dilbert strips strung together and given human dimension and narrative shape. [30 Jan 2003]","Jul 25, 2013"
|
| 8 |
+
Phil Rosenthal,"It takes a while to get into the rhythms of Gervais' writing and seemingly improvised acting, but once you've become a fan, it's the kind of show you can watch over and over. [10 Oct 2003]","Jul 25, 2013"
|
| 9 |
+
Ann Donahue,"Never have lives of quiet desperation been so laugh-out-loud funny as in ""The Office."" [10 Oct 2003]","Jul 25, 2013"
|
| 10 |
+
Steve Johnson,"The laughs are still there ... But the show begins to take on ominous overtones, a kind of small-scale tragedy in the making as it plots the apparent path of a little dictator's downfall. [10 Oct 2003]","Jul 25, 2013"
|
| 11 |
+
Joanne Ostrow,"David Brent, brilliantly conceived and played by Gervais, remains among the most wonderfully annoying characters in modern TV comedy. [12 Oct 2003]","Jul 25, 2013"
|
| 12 |
+
Tim Goodman,The funniest thing you're likely to see all year.,"Jul 25, 2013"
|
| 13 |
+
Mike Duffy,"Fascinating and outrageously witty ... the best new sitcom to hit American television since Larry David's similarly inspired ""Curb Your Enthusiasm."" ... An instant classic. [23 Jan 2003]","Jul 25, 2013"
|
| 14 |
+
Heather Havrilesky,"The first time you watch the show, you really don’t believe what you’re seeing. Each moment feels so real, it’s hard to tell if the actors are improvising brilliantly or just delivering their lines with incredible conviction. Like the best moments of 'This Is Spinal Tap' or 'Waiting for Guffman,' 'The Office' offers up breathtaking slices of deadpan humor and amazing comic timing.","Jul 25, 2013"
|
| 15 |
+
Virginia Heffernan,"As viewers, because of the incalculable talents of the actor Gervais, who also helped create the show, we must choose to humor David or to loathe him—and that choice is exciting, somehow, and challenging.","Jul 25, 2013"
|
| 16 |
+
Joy Press,"Wincingly funny ... It's the details that make the show so funny, not to mention the awesome veracity of the acting.","Jul 25, 2013"
|
| 17 |
+
Robert Lloyd,"As did the six previous episodes, the new installments generate a tension so awful, from circumstances so awfully lifelike, that you have to watch at times from behind laced fingers, with teeth clenched and the remote control close at hand.","Jul 25, 2013"
|
| 18 |
+
Alan Sepinwall,"A scream, in the biting Britcom tradition of ""Fawlty Towers"" and the best depiction of middle management hell since Mike Judge's cult classic ""Office Space."" [23 Jan 2003]","Jul 25, 2013"
|
| 19 |
+
Alessandra Stanley,"Television's funniest show. ... On a less carefully written show, the [mockumentary] conceit would almost certainly pall after a few episodes. 'The Office' is instead addictive, less because viewers grow to love David and his batty employees than because the show refuses to let those characters grow too lovable.","Jul 25, 2013"
|
| 20 |
+
Tim Goodman,"For those unbowed by the lack of formula, this second season of ""The Office"" has rewards even greater than the first. The series is both funnier and darker -- much darker -- than last season.","Jul 25, 2013"
|
| 21 |
+
Christopher Sieving,"In 10 years of reviewing film and television for various publications, no comedy has given me as much pleasure as The Office.","Jul 25, 2013"
|
| 22 |
+
Alan Sepinwall,"The six-episode first season of ""The Office"" was so dark, so wicked, so brilliant that it was hard to imagine Gervais and Merchant topping themselves. But they have. By slowly chipping away at David's power base, they've made him even more desperate, petulant and bullying. (The less funny David gets, the funnier the show is.) [10 Oct 2003]","Jul 25, 2013"
|
| 23 |
+
Robert Bianco,"Gervais' show is so unusual, and his performance as David Brent is so painfully specific -- and sometimes just so flat-out painful -- it's hard to imagine how anyone else can make it work. [23 Jan 2003]","Jul 25, 2013"
|
| 24 |
+
Matthew Gilbert,It's hard to imagine any other comedy series putting such a fitting cap on its run. [21 Oct 2004],"Jul 25, 2013"
|
| 25 |
+
David Bianculli,"A fully satisfying and fitting TV finale, and a show that really should be seen. It's acted as well as it's written and directed, which means it's just about perfect. [21 Oct 2004]","Jul 25, 2013"
|
| 26 |
+
Bill Goodykoontz,"Like the series that preceded it, The Office Special is brilliant. [21 Oct 2004]","Jul 25, 2013"
|
| 27 |
+
Sarah Rodman,"As he did in the series, Gervais imbues Brent with a kind of idiotic cleverness that is both silly and devastating. [21 Oct 2004]","Jul 25, 2013"
|
| 28 |
+
Joanne Ostrow,Supremely satisfying. [21 Oct 2004],"Jul 25, 2013"
|
| 29 |
+
Robert Lloyd,"It integrates the boredom, self-delusion, dashed hopes and struggle for power into something bigger, and potentially better, and functions not only as a continuation of the story but a convincing conclusion.","Jul 25, 2013"
|
| 30 |
+
Mike Duffy,A masterful series finale. [21 Oct 2004],"Jul 25, 2013"
|
| 31 |
+
Diane Werts,"Whether it's Brent's starry-eyed foppishness, Dawn's artistic daydreams or Gareth's organizational stiffness, these are characters we don't see on American TV. They're not accomplished, clever or distinctive. But they're so well-observed, and so subtly personified, that it's as if we're finding amusement in people we know. [21 Oct 2004]","Jul 25, 2013"
|
| 32 |
+
Mike McDaniel,Easily one of the funniest shows of the year. [21 Oct 2004],"Jul 25, 2013"
|
| 33 |
+
Matt Zoller Seitz,A masterful two-hour finale to an already exceptional program. [21 Oct 2004],"Jul 25, 2013"
|
| 34 |
+
Alessandra Stanley,"As wickedly, painfully funny as the first two seasons and, in tiny, fleeting doses, as delicately tender.","Jul 25, 2013"
|
| 35 |
+
Charlie McCollum,"The special stays true to the tone and searing wit of the original series and makes for a spectacularly funny coda to ""The Office."" [18 Oct 2004]","Jul 25, 2013"
|
| 36 |
+
Tim Goodman,Quite possibly the finest closing chapter ever for a TV series.,"Jul 25, 2013"
|
| 37 |
+
Devin Gordon,"Thanks to a finale that is, by turns, hysterical, excruciating and even poignant, this series--about a blowhard branch manager (Ricky Gervais) who gets sacked from his job at a struggling paper company--gets to quit while it's way, way ahead. [18 Oct 2004]","Jul 25, 2013"
|
| 38 |
+
Rob Owen,Brilliant. [21 Oct 2004],"Jul 25, 2013"
|
| 39 |
+
James Poniewozik,"A satisfying, touching and excruciatingly funny severance package.","Jul 1, 2013"
|
| 40 |
+
,"For the most part, the special continues with the show's distinctive brand of brutal, punchline-free, squirm-inducing humor. Ultimately, though, it gives us what the first and second seasons did not, a happy ending. This optimistic conclusion could be seen as a loss of nerve. I prefer to see it as chance for release.","Jul 3, 2013"
|
| 41 |
+
Rob Owen,"""The Office"" is hilarious, but it is an acquired taste as it serves up comedy of the uncomfortable. [10 Oct 2003]","Jul 25, 2013"
|
| 42 |
+
,"It's a wonderful, subversive concept, and by failing to romanticize the players, ""Office"" remains true to its ghastly, funny self. [23 Jan 2003]","Jul 25, 2013"
|
| 43 |
+
Tom Shales,"Mike Judge, creator of ""Beavis and Butt-head,"" made a darn good try at a seriously funny workplace comedy with his 1999 film ""Office Space,"" but Gervais and Merchant have even greater success. ""The Office"" is hilarious in a very hip and flippant way. [30 Jan 2003]","Jul 25, 2013"
|
| 44 |
+
Maureen Ryan,"What's most satisfying about ""The Office"" is that, despite the sharpest humor this side of ""The Larry Sanders Show,"" it has an ultimately sympathetic take on the cubicle-dwellers of the world, and that outlook is derived from a million tiny observations about personal decency (and lack thereof). [21 Oct 2004]","Jul 25, 2013"
|
| 45 |
+
David Bianculli,"It's dependent almost entirely on characters rather than plot - but, with such recognizable characters, ""The Office"" works beautifully. [10 Oct 2003]","Jul 25, 2013"
|
| 46 |
+
Robert P. Laurence,"Dark, sly, ironic, subtle, brilliant. ... A taste for British humor comes in handy in watching ""The Office,"" though. If you're bothered by deliberate (but tongue-in-cheek) bad taste, raging political incorrectness, sexual innuendo or comedy involving large sexual toys, or if you just don't get satire, ""The Office"" may not be right for you. [24 Oct 2003]","Jul 25, 2013"
|
| 47 |
+
Robert Bianco,"It takes a real artist's eye to concentrate reality so realistically, and a true wit to pull it off in a sitcom that makes you gasp as frequently as it makes you laugh. [10 Oct 2003]","Jul 25, 2013"
|
| 48 |
+
Marvin Kitman,"Larry David is obnoxious in ""Curb Your Enthusiasm"" but very funny. Gervais' David is just obnoxious. ... It's the sort of comedy that only certain people can get, like the way dogs can hear sounds human can't. I'm ashamed to say, I couldn't take it more than one dinner hour. [19 Oct 2003]","Jul 25, 2013"
|
model.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model.py
|
| 2 |
+
"""
|
| 3 |
+
Model wrapper using Hugging Face pipelines.
|
| 4 |
+
We use a ready-made sentiment-analysis pipeline so we don't train here.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from transformers import pipeline
|
| 8 |
+
from typing import List, Dict
|
| 9 |
+
import math
|
| 10 |
+
from tqdm import tqdm
|
| 11 |
+
|
| 12 |
+
# Choose a well-known small finetuned model for sentiment (works out of the box)
|
| 13 |
+
DEFAULT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
|
| 14 |
+
|
| 15 |
+
# Initialize pipeline (cached by HF locally)
|
| 16 |
+
sentiment_pipeline = pipeline("sentiment-analysis", model=DEFAULT_MODEL)
|
| 17 |
+
|
| 18 |
+
def analyze_text(text: str) -> Dict:
|
| 19 |
+
"""
|
| 20 |
+
Analyze a single text string and return label + score.
|
| 21 |
+
Returns: {"label": "POSITIVE"/"NEGATIVE", "score": float}
|
| 22 |
+
"""
|
| 23 |
+
if not isinstance(text, str) or text.strip() == "":
|
| 24 |
+
return {"label": "NEUTRAL", "score": 0.0}
|
| 25 |
+
out = sentiment_pipeline(text[:1000]) # cut very long input for speed
|
| 26 |
+
# pipeline returns a list with dict(s)
|
| 27 |
+
return out[0]
|
| 28 |
+
|
| 29 |
+
def analyze_batch(texts: List[str], batch_size: int = 16) -> List[Dict]:
|
| 30 |
+
"""
|
| 31 |
+
Analyze a list of texts in batches (to avoid memory spikes).
|
| 32 |
+
"""
|
| 33 |
+
results = []
|
| 34 |
+
n = len(texts)
|
| 35 |
+
for i in tqdm(range(0, n, batch_size), desc="Running model"):
|
| 36 |
+
batch = texts[i:i+batch_size]
|
| 37 |
+
res = sentiment_pipeline(batch)
|
| 38 |
+
results.extend(res)
|
| 39 |
+
return results
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers>=4.30.0
|
| 2 |
+
torch
|
| 3 |
+
pandas
|
| 4 |
+
gradio
|
| 5 |
+
tqdm
|
utils.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# utils.py
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from typing import List, Dict
|
| 4 |
+
|
| 5 |
+
def add_predictions_to_df(df: pd.DataFrame, preds: List[Dict]) -> pd.DataFrame:
|
| 6 |
+
"""
|
| 7 |
+
Given a DataFrame with 'review' and a list of predictions (dicts with label,score),
|
| 8 |
+
attach prediction columns and return new DataFrame.
|
| 9 |
+
"""
|
| 10 |
+
df = df.copy()
|
| 11 |
+
labels = [p.get('label', '') for p in preds]
|
| 12 |
+
scores = [float(p.get('score', 0.0)) for p in preds]
|
| 13 |
+
df['prediction'] = labels
|
| 14 |
+
df['prediction_score'] = scores
|
| 15 |
+
return df
|