Borio047 commited on
Commit
367613d
·
verified ·
1 Parent(s): c7981f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -15
app.py CHANGED
@@ -3,33 +3,53 @@ import torch
3
  from transformers import pipeline
4
 
5
  # 1. Choose a TTS model from Hugging Face
6
- # You can later change this to another model, e.g. "suno/bark-small" if supported
7
- TTS_MODEL_ID = "facebook/mms-tts-eng" # English TTS
8
 
9
  # 2. Create the TTS pipeline
10
  device = 0 if torch.cuda.is_available() else -1
11
- tts = pipeline("text-to-speech", model=TTS_MODEL_ID, device=device)
12
 
13
- def synthesize_tts(text):
 
 
 
 
 
 
 
 
 
 
14
  if not text or text.strip() == "":
15
  raise gr.Error("Please enter some text to synthesize.")
16
-
17
- # 3. Run the pipeline
18
- out = tts(text)
19
- # out["audio"] is a numpy array; out["sampling_rate"] is the sample rate
20
- audio = (out["sampling_rate"], out["audio"])
21
- return audio
 
 
 
 
 
 
 
 
 
 
22
 
23
  title = "Simple Text-to-Speech (TTS) Space"
24
  description = (
25
  "Enter some English text and generate speech using a Hugging Face TTS model. "
26
- "You can later replace the model with F5-TTS for voice cloning."
27
  )
28
 
29
  with gr.Blocks() as demo:
30
  gr.Markdown(f"# {title}")
31
  gr.Markdown(description)
32
-
33
  with gr.Row():
34
  with gr.Column():
35
  text_in = gr.Textbox(
@@ -39,9 +59,10 @@ with gr.Blocks() as demo:
39
  )
40
  btn = gr.Button("Generate Speech")
41
  with gr.Column():
 
42
  audio_out = gr.Audio(label="Generated audio", type="numpy")
43
-
44
  btn.click(fn=synthesize_tts, inputs=text_in, outputs=audio_out)
45
 
46
- if __name__ == "__main__":
47
- demo.launch()
 
3
  from transformers import pipeline
4
 
5
  # 1. Choose a TTS model from Hugging Face
6
+ # This model is for English TTS. You can later swap it for another.
7
+ TTS_MODEL_ID = "facebook/mms-tts-eng"
8
 
9
  # 2. Create the TTS pipeline
10
  device = 0 if torch.cuda.is_available() else -1
11
+ print(f"Using device: {'cuda' if device == 0 else 'cpu'}")
12
 
13
+ try:
14
+ tts = pipeline("text-to-speech", model=TTS_MODEL_ID, device=device)
15
+ except Exception as e:
16
+ # If the model can't be loaded, fail early with a clear message
17
+ raise RuntimeError(f"Failed to load TTS pipeline: {e}")
18
+
19
+
20
+ def synthesize_tts(text: str):
21
+ """
22
+ Take text and return (sampling_rate, audio_numpy) for Gradio Audio output.
23
+ """
24
  if not text or text.strip() == "":
25
  raise gr.Error("Please enter some text to synthesize.")
26
+
27
+ try:
28
+ out = tts(text)
29
+ except Exception as e:
30
+ # Show any HF pipeline error nicely in the UI
31
+ raise gr.Error(f"TTS pipeline error: {e}")
32
+
33
+ # Expecting a dict with 'audio' (numpy array) and 'sampling_rate' (int)
34
+ if not isinstance(out, dict) or "audio" not in out or "sampling_rate" not in out:
35
+ raise gr.Error(f"Unexpected TTS output format: {out}")
36
+
37
+ audio = out["audio"]
38
+ sr = out["sampling_rate"]
39
+
40
+ return (sr, audio)
41
+
42
 
43
  title = "Simple Text-to-Speech (TTS) Space"
44
  description = (
45
  "Enter some English text and generate speech using a Hugging Face TTS model. "
46
+ "Once this works, we can upgrade it to voice cloning (F5-TTS style)."
47
  )
48
 
49
  with gr.Blocks() as demo:
50
  gr.Markdown(f"# {title}")
51
  gr.Markdown(description)
52
+
53
  with gr.Row():
54
  with gr.Column():
55
  text_in = gr.Textbox(
 
59
  )
60
  btn = gr.Button("Generate Speech")
61
  with gr.Column():
62
+ # type='numpy' means we can return (sr, numpy_array)
63
  audio_out = gr.Audio(label="Generated audio", type="numpy")
64
+
65
  btn.click(fn=synthesize_tts, inputs=text_in, outputs=audio_out)
66
 
67
+ # On Spaces it's fine to launch unconditionally; disable SSR to avoid async quirks
68
+ demo.launch(ssr_mode=False)