ramybenaroya's picture
Update app.py
910eaed verified
import gradio as gr
import torch
from transformers import pipeline
import spaces
# Initialize the speech-to-text pipeline
pipe = pipeline(
"automatic-speech-recognition",
model="ivrit-ai/whisper-large-v3",
torch_dtype=torch.float16,
device="cuda",
)
@spaces.GPU
def transcribe(audio):
"""
Transcribes the given audio file to Hebrew text.
"""
if audio is None:
return "Please upload or record an audio file."
# Transcribe the audio file to Hebrew
result = pipe(audio, generate_kwargs={"language": "hebrew"})
return result['text']
# Create the Gradio interface
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath", label="Hebrew Audio"),
outputs="text",
title="Hebrew Speech-to-Text Transcription",
description=(
"Use this app to transcribe Hebrew speech to text. "
"Upload your own audio file or record audio directly through your microphone. "
"The model used is `ivrit-ai/whisper-large-v3`."
),
allow_flagging="never"
)
# Launch the app
demo.launch()