Spaces:
Sleeping
Sleeping
| import time, os, shutil, subprocess, tempfile | |
| import numpy as np | |
| import gradio as gr | |
| import soundfile as sf | |
| import torch | |
| from speechbrain.inference.TTS import Tacotron2 | |
| from speechbrain.inference.vocoders import HIFIGAN | |
| from speechbrain.utils.fetching import LocalStrategy | |
| SAMPLE_RATE = 22050 | |
| # ---- Load models once (on Space startup) ---- | |
| taco = Tacotron2.from_hparams( | |
| source="Sunbird/tts-tacotron2-lug", | |
| savedir="pretrained/tts-tacotron2-lug", | |
| local_strategy=LocalStrategy.COPY, | |
| ) | |
| vocoder = HIFIGAN.from_hparams( | |
| source="speechbrain/tts-hifigan-ljspeech", | |
| savedir="pretrained/tts-hifigan-ljspeech", | |
| local_strategy=LocalStrategy.COPY, | |
| ) | |
| def _ensure_mel_shape(mel): | |
| # Make sure mel is [B, n_mels, T] | |
| if isinstance(mel, (tuple, list)): | |
| mel = mel[0] | |
| if mel.dim() == 3 and mel.shape[1] != 80 and mel.shape[2] == 80: | |
| mel = mel.transpose(1, 2) | |
| return mel | |
| def _have_ffmpeg(): | |
| return shutil.which("ffmpeg") is not None | |
| def _save_wav_np(path, wav_tensor): | |
| """Save float32 mono [-1,1] to WAV using soundfile (no torchaudio backend needed).""" | |
| x = wav_tensor.detach().cpu().numpy().astype(np.float32) | |
| sf.write(path, x, SAMPLE_RATE, subtype="PCM_16") | |
| def tts_luganda(text): | |
| text = (text or "").strip() | |
| if not text: | |
| return None, None, "Please enter Luganda text." | |
| # Synthesize | |
| mel = _ensure_mel_shape(taco.encode_text(text)) | |
| wav = vocoder.decode_batch(mel)[0].squeeze(0) # 1D torch tensor | |
| # Save a temporary WAV | |
| ts = int(time.time()) | |
| base = f"luganda_tts_{ts}" | |
| wav_path = os.path.join(tempfile.gettempdir(), base + ".wav") | |
| _save_wav_np(wav_path, wav) | |
| # Optional MP3 via ffmpeg | |
| mp3_path = None | |
| if _have_ffmpeg(): | |
| mp3_path = os.path.join(tempfile.gettempdir(), base + ".mp3") | |
| try: | |
| subprocess.run( | |
| ["ffmpeg", "-y", "-i", wav_path, "-codec:a", "libmp3lame", "-q:a", "2", mp3_path], | |
| check=True, | |
| stdout=subprocess.DEVNULL, | |
| stderr=subprocess.DEVNULL, | |
| ) | |
| except Exception: | |
| mp3_path = None | |
| status = "β Done." | |
| if mp3_path: | |
| status += " (WAV + MP3 ready)" | |
| else: | |
| status += " (WAV ready)" | |
| return wav_path, (mp3_path if mp3_path else None), status | |
| with gr.Blocks(title="Luganda TTS") as demo: | |
| gr.Markdown("# π Luganda Text-to-Speech\nType Luganda, click **Generate**, and listen/download the audio.") | |
| text = gr.Textbox(label="Luganda text", lines=6, value="Ngenda mu kibuga Kampala olunaku lwa leero.") | |
| btn = gr.Button("Generate", variant="primary") | |
| out_wav = gr.Audio(label="WAV (22.05 kHz)", type="filepath") | |
| out_mp3 = gr.File(label="Download MP3", interactive=False) | |
| status = gr.Markdown("Ready.") | |
| btn.click(fn=tts_luganda, inputs=text, outputs=[out_wav, out_mp3, status]) | |
| # Just enable queue with defaults (no unsupported args) | |
| demo.queue().launch() | |