Spaces:

damla921
/

Luganda_TTS

Sleeping

App Files Files Community

Luganda_TTS / app.py

damla921

Update app.py

61aea24 verified 3 months ago

raw

history blame contribute delete

2.96 kB

	import time, os, shutil, subprocess, tempfile
	import numpy as np
	import gradio as gr
	import soundfile as sf
	import torch
	from speechbrain.inference.TTS import Tacotron2
	from speechbrain.inference.vocoders import HIFIGAN
	from speechbrain.utils.fetching import LocalStrategy

	SAMPLE_RATE = 22050

	# ---- Load models once (on Space startup) ----
	taco = Tacotron2.from_hparams(
	source="Sunbird/tts-tacotron2-lug",
	savedir="pretrained/tts-tacotron2-lug",
	local_strategy=LocalStrategy.COPY,
	)
	vocoder = HIFIGAN.from_hparams(
	source="speechbrain/tts-hifigan-ljspeech",
	savedir="pretrained/tts-hifigan-ljspeech",
	local_strategy=LocalStrategy.COPY,
	)

	def _ensure_mel_shape(mel):
	# Make sure mel is [B, n_mels, T]
	if isinstance(mel, (tuple, list)):
	mel = mel[0]
	if mel.dim() == 3 and mel.shape[1] != 80 and mel.shape[2] == 80:
	mel = mel.transpose(1, 2)
	return mel

	def _have_ffmpeg():
	return shutil.which("ffmpeg") is not None

	def _save_wav_np(path, wav_tensor):
	"""Save float32 mono [-1,1] to WAV using soundfile (no torchaudio backend needed)."""
	x = wav_tensor.detach().cpu().numpy().astype(np.float32)
	sf.write(path, x, SAMPLE_RATE, subtype="PCM_16")

	def tts_luganda(text):
	text = (text or "").strip()
	if not text:
	return None, None, "Please enter Luganda text."

	# Synthesize
	mel = _ensure_mel_shape(taco.encode_text(text))
	wav = vocoder.decode_batch(mel)[0].squeeze(0) # 1D torch tensor

	# Save a temporary WAV
	ts = int(time.time())
	base = f"luganda_tts_{ts}"
	wav_path = os.path.join(tempfile.gettempdir(), base + ".wav")
	_save_wav_np(wav_path, wav)

	# Optional MP3 via ffmpeg
	mp3_path = None
	if _have_ffmpeg():
	mp3_path = os.path.join(tempfile.gettempdir(), base + ".mp3")
	try:
	subprocess.run(
	["ffmpeg", "-y", "-i", wav_path, "-codec:a", "libmp3lame", "-q:a", "2", mp3_path],
	check=True,
	stdout=subprocess.DEVNULL,
	stderr=subprocess.DEVNULL,
	)
	except Exception:
	mp3_path = None

	status = "✅ Done."
	if mp3_path:
	status += " (WAV + MP3 ready)"
	else:
	status += " (WAV ready)"

	return wav_path, (mp3_path if mp3_path else None), status

	with gr.Blocks(title="Luganda TTS") as demo:
	gr.Markdown("# 🌍 Luganda Text-to-Speech\nType Luganda, click Generate, and listen/download the audio.")
	text = gr.Textbox(label="Luganda text", lines=6, value="Ngenda mu kibuga Kampala olunaku lwa leero.")
	btn = gr.Button("Generate", variant="primary")
	out_wav = gr.Audio(label="WAV (22.05 kHz)", type="filepath")
	out_mp3 = gr.File(label="Download MP3", interactive=False)
	status = gr.Markdown("Ready.")

	btn.click(fn=tts_luganda, inputs=text, outputs=[out_wav, out_mp3, status])

	# Just enable queue with defaults (no unsupported args)
	demo.queue().launch()