Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| # file: app.py | |
| # time: 18:37 23/09/2023 | |
| # author: Amir Khan | |
| # github: https://github.com/Amir22010 | |
| import os | |
| import numpy as np | |
| import ast | |
| import gradio as gr | |
| import pandas as pd | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| try: | |
| tokenizer_english = AutoTokenizer.from_pretrained("amir22010/PyABSA_Hospital_English_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) | |
| double_english_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/PyABSA_Hospital_English_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) | |
| except Exception as e: | |
| print(e) | |
| print("english model load error") | |
| try: | |
| tokenizer_multilingual = AutoTokenizer.from_pretrained("amir22010/PyABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) | |
| double_multilingual_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/PyABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model",cache_dir=os.getcwd()) | |
| except Exception as e: | |
| print(e) | |
| print("multilingual model load error") | |
| try: | |
| tokenizer_keybert = AutoTokenizer.from_pretrained("amir22010/KeyBert_ABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model") | |
| double_keybert_generator = AutoModelForSeq2SeqLM.from_pretrained("amir22010/KeyBert_ABSA_Hospital_Multilingual_allenai_tk-instruct-base-def-pos_FinedTuned_Model") | |
| except Exception as e: | |
| print(e) | |
| print("keybert model load error") | |
| def perform_asde_inference(text, dataset, model_id): | |
| if not text: | |
| if model_id == "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
| df = pd.read_csv('pyabsa_english.csv')#validation dataset | |
| elif model_id == "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
| df = pd.read_csv('pyabsa_multilingual.csv')#validation dataset | |
| elif model_id == "KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
| df = pd.read_csv('keybert_valid.csv')#validation dataset | |
| random_i = np.random.randint(low=0, high=df.shape[0], size=(1,)).flat[0] | |
| selected_df = df.iloc[random_i] | |
| text = selected_df['clean_text'] | |
| true_aspect = selected_df['actual_aspects'] | |
| true_sentiment = selected_df['actual_sentiments'] | |
| true_doubles = pd.DataFrame(list(map(list, zip(ast.literal_eval(true_aspect), ast.literal_eval(true_sentiment)))),columns=['Aspect','Sentiment']) | |
| else: | |
| true_doubles = pd.DataFrame([["NA","NA"]],columns=['Aspect','Sentiment']) | |
| bos_instruction = """Definition: The output will be the aspects (both implicit and explicit) and the aspects sentiment polarity. In cases where there are no aspects the output should be noaspectterm:none. | |
| Positive example 1- | |
| input: this hospital has a good team of doctors who will take care of all your needs brilliantly. | |
| output: doctors:positive | |
| Positive example 2- | |
| input: Arthur as Irv at ham hospital ran an Nagar , Madurai has a doctor who engages you in a conversation and tries to take your mind off the pain and he has trained the staff to do so as well. | |
| output: doctor:positive, staff:positive | |
| Now complete the following example- | |
| input: """ | |
| delim_instruct = '' | |
| eos_instruct = ' \noutput:' | |
| if model_id == "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
| tokenized_text = tokenizer_english(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt") | |
| output = double_english_generator.generate(tokenized_text.input_ids,max_length=512) | |
| model_generated = tokenizer_english.decode(output[0], skip_special_tokens=True) | |
| elif model_id == "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
| tokenized_text = tokenizer_multilingual(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt") | |
| output = double_multilingual_generator.generate(tokenized_text.input_ids,max_length=512) | |
| model_generated = tokenizer_multilingual.decode(output[0], skip_special_tokens=True) | |
| elif model_id == "KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model": | |
| tokenized_text = tokenizer_keybert(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt") | |
| output = double_keybert_generator.generate(tokenized_text.input_ids,max_length=512) | |
| model_generated = tokenizer_keybert.decode(output[0], skip_special_tokens=True) | |
| pred_asp = [i.split(':')[0] for i in model_generated.split(',')] | |
| pred_sent = [i.split(':')[1] for i in model_generated.split(',')] | |
| pred_doubles = pd.DataFrame(list(map(list, zip(pred_asp, pred_sent))),columns=['Aspect','Sentiment']) | |
| return pred_doubles, true_doubles, text, model_generated | |
| def run_demo(text, dataset, model_id): | |
| try: | |
| return inference(text, dataset, model_id) | |
| except Exception as e: | |
| print(e) | |
| def inference(text, dataset, model_id): | |
| return perform_asde_inference(text, dataset, model_id) | |
| if __name__ == "__main__": | |
| demo = gr.Blocks() | |
| with demo: | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown( | |
| "# <p align='center'>Hospital Review Aspect Sentiment Generation</p>" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| asde_input_sentence = gr.Textbox( | |
| placeholder="Leave this box blank and choose a dataset will give you a random example...", | |
| label="Example:", | |
| ) | |
| gr.Markdown( | |
| "You can find code and dataset at [MTech Thesis Project 2023](https://github.com/Amir22010/MTP_Thesis_Project_2023/tree/main)" | |
| ) | |
| asde_dataset_ids = gr.Radio( | |
| choices=[ | |
| "HospitalReviews" | |
| ], | |
| value="HospitalReviews", | |
| label="Datasets", | |
| ) | |
| asde_model_ids = gr.Radio( | |
| choices=[ | |
| "PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model", | |
| "PyABSA_Hospital_Multilingual_allenai/tk-instruct-base-def-pos_FinedTuned_Model", | |
| "KeyBert_ABSA_Hospital_allenai/tk-instruct-base-def-pos_FinedTuned_Model" | |
| ], | |
| value="PyABSA_Hospital_English_allenai/tk-instruct-base-def-pos_FinedTuned_Model", | |
| label="Fine-tuned Models on Hospital Review custom data", | |
| ) | |
| asde_inference_button = gr.Button("Let's go!") | |
| asde_output_text = gr.TextArea(label="Example:") | |
| asde_model_output_generated_sentence = gr.Textbox( | |
| placeholder="Text Generated...", | |
| label="Model Prediction Text Generated:", | |
| ) | |
| asde_output_pred_df = gr.DataFrame( | |
| label="Predicted Aspect & Sentiment:" | |
| ) | |
| asde_output_true_df = gr.DataFrame( | |
| label="Original Aspect & Sentiment:" | |
| ) | |
| asde_inference_button.click( | |
| fn=run_demo, | |
| inputs=[ | |
| asde_input_sentence, | |
| asde_dataset_ids, | |
| asde_model_ids | |
| ], | |
| outputs=[ | |
| asde_output_pred_df, | |
| asde_output_true_df, | |
| asde_output_text, | |
| asde_model_output_generated_sentence | |
| ], | |
| ) | |
| gr.Markdown( | |
| """### Author: [Amir Khan](https://github.com/Amir22010) | |
| """ | |
| ) | |
| demo.launch() |