Spaces:

Xenobd
/

Supersonic-TTS-Cpu

Running

App Files Files Community

Supersonic-TTS-Cpu / app.py

Xenobd

Create app.py

bfc658b verified 25 days ago

raw

history blame

2.73 kB

	import os
	import gradio as gr
	import numpy as np
	import soundfile as sf
	import tempfile

	# ------------------------------------
	# REPO AUTO-CLONE SECTION (as you asked)
	# ------------------------------------
	REPO_URL = "https://huggingface.co/Supertone/supertonic"
	TARGET_DIR = "supertonic" # folder name after clone

	def run_cmd(cmd):
	print(f"[CMD] {cmd}")
	return os.system(cmd)

	print("=== Checking Supertonic repo ===")
	if not os.path.exists(TARGET_DIR):
	print("[+] Cloning repo (LFS pointers only)...")
	run_cmd("git lfs install")
	run_cmd(f"GIT_LFS_SKIP_SMUDGE=1 git clone {REPO_URL} {TARGET_DIR}")
	else:
	print("[✓] Repo already exists. Skipping clone.")


	# ------------------------------------
	# IMPORT TTS MODULE FROM YOUR CLONED PATH
	# ------------------------------------
	# You must make sure your tts_model.py is inside the repo or same folder
	from tts_model import (
	load_text_to_speech,
	load_voice_style,
	sanitize_filename,
	)

	# ------------------------------------
	# LOAD MODEL
	# ------------------------------------
	ONNX_DIR = "./onnx" # change if needed
	STYLE_FILES = ["./styles/default.json"]
	TOTAL_STEP = 30

	print("Loading TTS model...")
	tts = load_text_to_speech(ONNX_DIR)
	style = load_voice_style(STYLE_FILES)


	# ------------------------------------
	# TTS FUNCTION
	# ------------------------------------
	def run_tts(text, speed):
	if not text.strip():
	return None, "Text cannot be empty."

	wav, dur = tts(
	text=text,
	style=style,
	total_step=TOTAL_STEP,
	speed=float(speed),
	silence_duration=0.3,
	)

	wav = wav.squeeze() # [1, N] -> [N]

	# Save to temporary WAV
	tmp_path = tempfile.mktemp(suffix=".wav")
	sf.write(tmp_path, wav, tts.sample_rate)

	return tmp_path, f"Generated duration: {float(dur):.2f}s"


	# ------------------------------------
	# GRADIO UI
	# ------------------------------------
	def ui():
	with gr.Blocks(title="ONNX TTS") as demo:
	gr.Markdown("## 🎤 ONNX Text-To-Speech Demo")

	text = gr.Textbox(
	label="Input Text",
	placeholder="Type something...",
	lines=4
	)

	speed = gr.Slider(
	label="Speed",
	minimum=0.5,
	maximum=2.0,
	value=1.05,
	step=0.01
	)

	generate_btn = gr.Button("Generate Speech 🔊")

	audio_out = gr.Audio(label="Output")
	info = gr.Textbox(label="Info", interactive=False)

	generate_btn.click(
	fn=run_tts,
	inputs=[text, speed],
	outputs=[audio_out, info]
	)

	return demo


	app = ui()

	if __name__ == "__main__":
	app.launch()