|
|
import gradio as gr |
|
|
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer |
|
|
|
|
|
|
|
|
Arabic_speech_Syllables_recognition = pipeline("automatic-speech-recognition", model="IbrahimSalah/Arabic_speech_Syllables_recognition_Using_Wav2vec2") |
|
|
Arabic_Syllables_to_text_model = AutoModelForSeq2SeqLM.from_pretrained("IbrahimSalah/Arabic_Syllables_to_text_Converter_Using_MT5") |
|
|
Arabic_Syllables_to_text_tokenizer = AutoTokenizer.from_pretrained("IbrahimSalah/Arabic_Syllables_to_text_Converter_Using_MT5") |
|
|
Arabic_Syllables_to_text_model.eval() |
|
|
|
|
|
def get_and_process_syllabels(audio): |
|
|
def pre_process_input(syllables): |
|
|
syllables="|"+syllables.replace(" ","|")+"." |
|
|
return syllables |
|
|
clip = Arabic_speech_Syllables_recognition(audio)['text'] |
|
|
preprocessed_input=pre_process_input(clip) |
|
|
|
|
|
input_ids = Arabic_Syllables_to_text_tokenizer.encode(preprocessed_input, return_tensors="pt",) |
|
|
output_ids = Arabic_Syllables_to_text_model.generate( |
|
|
input_ids, |
|
|
max_length=100, |
|
|
early_stopping=True, |
|
|
pad_token_id=Arabic_Syllables_to_text_tokenizer.pad_token_id, |
|
|
bos_token_id=Arabic_Syllables_to_text_tokenizer.bos_token_id, |
|
|
eos_token_id=Arabic_Syllables_to_text_tokenizer.eos_token_id, |
|
|
) |
|
|
|
|
|
|
|
|
output_text = Arabic_Syllables_to_text_tokenizer.decode(output_ids[0][1:], skip_special_tokens=True) |
|
|
return output_text.split(".")[0] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=get_and_process_syllabels, |
|
|
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"), |
|
|
outputs="text", |
|
|
title="Audio Transcription", |
|
|
description="Speak or upload an audio file to see the transcribed text with diacritics." |
|
|
) |
|
|
|
|
|
demo.launch(share=True) |