Spaces:

Borio047
/

DG-TTS

Sleeping

App Files Files Community

Borio047 commited on 24 days ago

Commit

367613d

verified ·

1 Parent(s): c7981f5

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -15

app.py CHANGED Viewed

@@ -3,33 +3,53 @@ import torch
 from transformers import pipeline
 # 1. Choose a TTS model from Hugging Face
-# You can later change this to another model, e.g. "suno/bark-small" if supported
-TTS_MODEL_ID = "facebook/mms-tts-eng"  # English TTS
 # 2. Create the TTS pipeline
 device = 0 if torch.cuda.is_available() else -1
-tts = pipeline("text-to-speech", model=TTS_MODEL_ID, device=device)
-def synthesize_tts(text):
     if not text or text.strip() == "":
         raise gr.Error("Please enter some text to synthesize.")
-    # 3. Run the pipeline
-    out = tts(text)
-    # out["audio"] is a numpy array; out["sampling_rate"] is the sample rate
-    audio = (out["sampling_rate"], out["audio"])
-    return audio
 title = "Simple Text-to-Speech (TTS) Space"
 description = (
     "Enter some English text and generate speech using a Hugging Face TTS model. "
-    "You can later replace the model with F5-TTS for voice cloning."
 )
 with gr.Blocks() as demo:
     gr.Markdown(f"# {title}")
     gr.Markdown(description)
     with gr.Row():
         with gr.Column():
             text_in = gr.Textbox(
@@ -39,9 +59,10 @@ with gr.Blocks() as demo:
             )
             btn = gr.Button("Generate Speech")
         with gr.Column():
             audio_out = gr.Audio(label="Generated audio", type="numpy")
     btn.click(fn=synthesize_tts, inputs=text_in, outputs=audio_out)
-if __name__ == "__main__":
-    demo.launch()

 from transformers import pipeline
 # 1. Choose a TTS model from Hugging Face
+# This model is for English TTS. You can later swap it for another.
+TTS_MODEL_ID = "facebook/mms-tts-eng"
 # 2. Create the TTS pipeline
 device = 0 if torch.cuda.is_available() else -1
+print(f"Using device: {'cuda' if device == 0 else 'cpu'}")
+try:
+    tts = pipeline("text-to-speech", model=TTS_MODEL_ID, device=device)
+except Exception as e:
+    # If the model can't be loaded, fail early with a clear message
+    raise RuntimeError(f"Failed to load TTS pipeline: {e}")
+def synthesize_tts(text: str):
+    """
+    Take text and return (sampling_rate, audio_numpy) for Gradio Audio output.
+    """
     if not text or text.strip() == "":
         raise gr.Error("Please enter some text to synthesize.")
+    try:
+        out = tts(text)
+    except Exception as e:
+        # Show any HF pipeline error nicely in the UI
+        raise gr.Error(f"TTS pipeline error: {e}")
+    # Expecting a dict with 'audio' (numpy array) and 'sampling_rate' (int)
+    if not isinstance(out, dict) or "audio" not in out or "sampling_rate" not in out:
+        raise gr.Error(f"Unexpected TTS output format: {out}")
+    audio = out["audio"]
+    sr = out["sampling_rate"]
+    return (sr, audio)
 title = "Simple Text-to-Speech (TTS) Space"
 description = (
     "Enter some English text and generate speech using a Hugging Face TTS model. "
+    "Once this works, we can upgrade it to voice cloning (F5-TTS style)."
 )
 with gr.Blocks() as demo:
     gr.Markdown(f"# {title}")
     gr.Markdown(description)
     with gr.Row():
         with gr.Column():
             text_in = gr.Textbox(
             )
             btn = gr.Button("Generate Speech")
         with gr.Column():
+            # type='numpy' means we can return (sr, numpy_array)
             audio_out = gr.Audio(label="Generated audio", type="numpy")
     btn.click(fn=synthesize_tts, inputs=text_in, outputs=audio_out)
+# On Spaces it's fine to launch unconditionally; disable SSR to avoid async quirks
+demo.launch(ssr_mode=False)