File size: 1,508 Bytes
8e25e9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import gradio as gr
import torch
from transformers import pipeline
# 1. Choose a TTS model from Hugging Face
# You can later change this to another model, e.g. "suno/bark-small" if supported
TTS_MODEL_ID = "facebook/mms-tts-eng" # English TTS
# 2. Create the TTS pipeline
device = 0 if torch.cuda.is_available() else -1
tts = pipeline("text-to-speech", model=TTS_MODEL_ID, device=device)
def synthesize_tts(text):
if not text or text.strip() == "":
raise gr.Error("Please enter some text to synthesize.")
# 3. Run the pipeline
out = tts(text)
# out["audio"] is a numpy array; out["sampling_rate"] is the sample rate
audio = (out["sampling_rate"], out["audio"])
return audio
title = "Simple Text-to-Speech (TTS) Space"
description = (
"Enter some English text and generate speech using a Hugging Face TTS model. "
"You can later replace the model with F5-TTS for voice cloning."
)
with gr.Blocks() as demo:
gr.Markdown(f"# {title}")
gr.Markdown(description)
with gr.Row():
with gr.Column():
text_in = gr.Textbox(
lines=4,
label="Text to synthesize",
placeholder="Type some English text here..."
)
btn = gr.Button("Generate Speech")
with gr.Column():
audio_out = gr.Audio(label="Generated audio", type="numpy")
btn.click(fn=synthesize_tts, inputs=text_in, outputs=audio_out)
if __name__ == "__main__":
demo.launch()
|