Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| import torch | |
| # Load the saved models and tokenizers | |
| model_roberta = AutoModelForSequenceClassification.from_pretrained("./models/roberta-base-offensive") | |
| tokenizer_roberta = AutoTokenizer.from_pretrained("./models/roberta-base-offensive") | |
| model_distilbert = AutoModelForSequenceClassification.from_pretrained("./models/distilbert-base-uncased-offensive") | |
| tokenizer_distilbert = AutoTokenizer.from_pretrained("./models/distilbert-base-uncased-offensive") | |
| model_deberta = AutoModelForSequenceClassification.from_pretrained("./models/deberta-offensive") | |
| tokenizer_deberta = AutoTokenizer.from_pretrained("./models/deberta-offensive") | |
| model_bert = AutoModelForSequenceClassification.from_pretrained("./models/bert-offensive") | |
| tokenizer_bert = AutoTokenizer.from_pretrained("./models/bert-offensive") | |
| # Arabic saved Models and tokenizers | |
| model_arbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/ARBERT") | |
| tokenizer_arbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/ARBERT") | |
| model_marbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/MARBERT") | |
| tokenizer_marbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/MARBERT") | |
| def predict(tweet, model_choice): | |
| if model_choice == "RoBERTa": | |
| model = model_roberta | |
| tokenizer = tokenizer_roberta | |
| elif model_choice == "DistilBERT": | |
| model = model_distilbert | |
| tokenizer = tokenizer_distilbert | |
| elif model_choice == "ARBERT": | |
| model = model_arbert | |
| tokenizer = tokenizer_arbert | |
| elif model_choice == "MARBERT": | |
| model = model_marbert | |
| tokenizer = tokenizer_marbert | |
| elif model_choice == "DeBERTa": | |
| model = model_deberta | |
| tokenizer = tokenizer_deberta | |
| elif model_choice == "BERT": | |
| model = model_bert | |
| tokenizer = tokenizer_bert | |
| else: | |
| return "Model not selected", "Please select a model." | |
| encoded_input = tokenizer.encode(tweet, return_tensors='pt', truncation=True, max_length=512, padding=True) | |
| with torch.no_grad(): | |
| output = model(encoded_input) | |
| logits = output.logits | |
| probabilities = torch.softmax(logits, dim=-1) | |
| prediction_index = probabilities.argmax().item() | |
| prediction_map = {0: "Not Offensive", 1: "Offensive"} | |
| prediction = prediction_map[prediction_index] | |
| confidence = probabilities[0, prediction_index].item() | |
| return prediction, f"Confidence: {confidence:.4f}" | |
| def app_interface(): | |
| with gr.Blocks() as app: | |
| gr.Markdown("## Offensive Language Detection") | |
| gr.Markdown("### Instructions:") | |
| gr.Markdown("1. Select the language of the text.\n2. Choose a model corresponding to the selected language:\n - For **English**: BERT, DeBERTa, RoBERTa, or DistilBERT\n - For **Tunisian Arabic**: ARBERT or MARBERT") | |
| with gr.Row(): | |
| language = gr.Radio(["English", "Tunisian Arabic"], label="Choose Language") | |
| with gr.Row(): | |
| model_choice = gr.Dropdown(["RoBERTa", "DistilBERT", "ARBERT", "MARBERT", "DeBERTa", "BERT"], label="Choose Model") | |
| with gr.Row(): | |
| tweet = gr.Textbox(lines=4, placeholder="Enter your text here...", label="Text") | |
| submit_btn = gr.Button("Predict") | |
| with gr.Row(): | |
| prediction = gr.Textbox(label="Prediction") | |
| confidence = gr.Textbox(label="Confidence") | |
| submit_btn.click(fn=predict, inputs=[tweet, model_choice], outputs=[prediction, confidence]) | |
| return app | |
| app = app_interface() | |
| app.launch() | |