|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
|
|
|
TTS_MODEL_ID = "facebook/mms-tts-eng" |
|
|
|
|
|
|
|
|
device = 0 if torch.cuda.is_available() else -1 |
|
|
tts = pipeline("text-to-speech", model=TTS_MODEL_ID, device=device) |
|
|
|
|
|
def synthesize_tts(text): |
|
|
if not text or text.strip() == "": |
|
|
raise gr.Error("Please enter some text to synthesize.") |
|
|
|
|
|
|
|
|
out = tts(text) |
|
|
|
|
|
audio = (out["sampling_rate"], out["audio"]) |
|
|
return audio |
|
|
|
|
|
title = "Simple Text-to-Speech (TTS) Space" |
|
|
description = ( |
|
|
"Enter some English text and generate speech using a Hugging Face TTS model. " |
|
|
"You can later replace the model with F5-TTS for voice cloning." |
|
|
) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown(f"# {title}") |
|
|
gr.Markdown(description) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
text_in = gr.Textbox( |
|
|
lines=4, |
|
|
label="Text to synthesize", |
|
|
placeholder="Type some English text here..." |
|
|
) |
|
|
btn = gr.Button("Generate Speech") |
|
|
with gr.Column(): |
|
|
audio_out = gr.Audio(label="Generated audio", type="numpy") |
|
|
|
|
|
btn.click(fn=synthesize_tts, inputs=text_in, outputs=audio_out) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|