gh-kaka22's picture
Create app.py
2dbf27c verified
import gradio as gr
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
# get models
Arabic_speech_Syllables_recognition = pipeline("automatic-speech-recognition", model="IbrahimSalah/Arabic_speech_Syllables_recognition_Using_Wav2vec2")
Arabic_Syllables_to_text_model = AutoModelForSeq2SeqLM.from_pretrained("IbrahimSalah/Arabic_Syllables_to_text_Converter_Using_MT5")
Arabic_Syllables_to_text_tokenizer = AutoTokenizer.from_pretrained("IbrahimSalah/Arabic_Syllables_to_text_Converter_Using_MT5")
Arabic_Syllables_to_text_model.eval()
def get_and_process_syllabels(audio):
def pre_process_input(syllables):
syllables="|"+syllables.replace(" ","|")+"."
return syllables
clip = Arabic_speech_Syllables_recognition(audio)['text']
preprocessed_input=pre_process_input(clip)
# Define the input text
input_ids = Arabic_Syllables_to_text_tokenizer.encode(preprocessed_input, return_tensors="pt",)
output_ids = Arabic_Syllables_to_text_model.generate(
input_ids,
max_length=100,
early_stopping=True,
pad_token_id=Arabic_Syllables_to_text_tokenizer.pad_token_id,
bos_token_id=Arabic_Syllables_to_text_tokenizer.bos_token_id,
eos_token_id=Arabic_Syllables_to_text_tokenizer.eos_token_id,
)
# Decode the output
output_text = Arabic_Syllables_to_text_tokenizer.decode(output_ids[0][1:], skip_special_tokens=True)
return output_text.split(".")[0]
# Create the interface
demo = gr.Interface(
fn=get_and_process_syllabels,
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
outputs="text",
title="Audio Transcription",
description="Speak or upload an audio file to see the transcribed text with diacritics."
)
demo.launch(share=True)