Spaces:
Runtime error
Runtime error
| # imports | |
| import gradio as gr | |
| import pandas as pd | |
| import tempfile | |
| import itertools | |
| import torch | |
| import numpy as np | |
| from numpy import dot | |
| from numpy.linalg import norm, multi_dot | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer | |
| # compute dot product of inputs | |
| # summary function - test for single gradio function interfrace | |
| def gr_cosine_similarity(sentence1, sentence2): | |
| # Create class for data preparation | |
| class SimpleDataset: | |
| def __init__(self, tokenized_texts): | |
| self.tokenized_texts = tokenized_texts | |
| def __len__(self): | |
| return len(self.tokenized_texts["input_ids"]) | |
| def __getitem__(self, idx): | |
| return {k: v[idx] for k, v in self.tokenized_texts.items()} | |
| # load tokenizer and model, create trainer | |
| model_name = "j-hartmann/emotion-english-distilroberta-base" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| trainer = Trainer(model=model) | |
| # sentences in list | |
| lines_s = [sentence1, sentence2] | |
| print(type(sentence1), type(sentence2)) | |
| print(sentence1, sentence2) | |
| print(lines_s) | |
| # Tokenize texts and create prediction data set | |
| tokenized_texts = tokenizer(lines_s, truncation=True, padding=True) | |
| pred_dataset = SimpleDataset(tokenized_texts) | |
| # Run predictions -> predict whole df | |
| predictions = trainer.predict(pred_dataset) | |
| # Transform predictions to labels | |
| preds = predictions.predictions.argmax(-1) | |
| labels = pd.Series(preds).map(model.config.id2label) | |
| scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1) | |
| # scores raw | |
| temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1, keepdims=True)).tolist() | |
| # work in progress | |
| # container | |
| anger = [] | |
| disgust = [] | |
| fear = [] | |
| joy = [] | |
| neutral = [] | |
| sadness = [] | |
| surprise = [] | |
| print(temp) | |
| # extract scores (as many entries as exist in pred_texts) | |
| for i in range(len(lines_s)): | |
| anger.append(round(temp[i][0], 3)) | |
| disgust.append(round(temp[i][1], 3)) | |
| fear.append(round(temp[i][2], 3)) | |
| joy.append(round(temp[i][3], 3)) | |
| neutral.append(round(temp[i][4], 3)) | |
| sadness.append(round(temp[i][5], 3)) | |
| surprise.append(round(temp[i][6], 3)) | |
| # define both vectors for the dot product | |
| # each include all values for both predictions | |
| v1 = temp[0] | |
| v2 = temp[1] | |
| print(type(v1), type(v2)) | |
| # compute dot product of all | |
| dot_product = dot(v1, v2) | |
| # define df | |
| df = pd.DataFrame(list(zip(lines_s, labels, anger, disgust, fear, joy, neutral, sadness, surprise)), | |
| columns=['text', 'max_label', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']) | |
| # compute cosine similarity | |
| # is dot product of vectors n / norms 1*..*n vectors | |
| cosine_similarity = round(dot_product / (norm(v1) * norm(v2)), 3) | |
| # return dataframe for space output | |
| return df, cosine_similarity | |
| gr.Interface(gr_cosine_similarity, | |
| [ | |
| gr.inputs.Textbox(lines=1, placeholder="This tool is awesome!", default="", label="Text 1"), | |
| gr.inputs.Textbox(lines=1, placeholder="I am so happy right now.", default="", label="Text 2"), | |
| ], | |
| ["dataframe","text"], | |
| title="Emotion Similarity", | |
| description="Input two sentences and the model returns their emotional similarity (between 0 and 1), using this model: https://huggingface.co/j-hartmann/emotion-english-distilroberta-base.", | |
| ).launch(debug=True) |