Xenobd's picture
Create app.py
bfc658b verified
raw
history blame
2.73 kB
import os
import gradio as gr
import numpy as np
import soundfile as sf
import tempfile
# ------------------------------------
# REPO AUTO-CLONE SECTION (as you asked)
# ------------------------------------
REPO_URL = "https://huggingface.co/Supertone/supertonic"
TARGET_DIR = "supertonic" # folder name after clone
def run_cmd(cmd):
print(f"[CMD] {cmd}")
return os.system(cmd)
print("=== Checking Supertonic repo ===")
if not os.path.exists(TARGET_DIR):
print("[+] Cloning repo (LFS pointers only)...")
run_cmd("git lfs install")
run_cmd(f"GIT_LFS_SKIP_SMUDGE=1 git clone {REPO_URL} {TARGET_DIR}")
else:
print("[βœ“] Repo already exists. Skipping clone.")
# ------------------------------------
# IMPORT TTS MODULE FROM YOUR CLONED PATH
# ------------------------------------
# You must make sure your tts_model.py is inside the repo or same folder
from tts_model import (
load_text_to_speech,
load_voice_style,
sanitize_filename,
)
# ------------------------------------
# LOAD MODEL
# ------------------------------------
ONNX_DIR = "./onnx" # change if needed
STYLE_FILES = ["./styles/default.json"]
TOTAL_STEP = 30
print("Loading TTS model...")
tts = load_text_to_speech(ONNX_DIR)
style = load_voice_style(STYLE_FILES)
# ------------------------------------
# TTS FUNCTION
# ------------------------------------
def run_tts(text, speed):
if not text.strip():
return None, "Text cannot be empty."
wav, dur = tts(
text=text,
style=style,
total_step=TOTAL_STEP,
speed=float(speed),
silence_duration=0.3,
)
wav = wav.squeeze() # [1, N] -> [N]
# Save to temporary WAV
tmp_path = tempfile.mktemp(suffix=".wav")
sf.write(tmp_path, wav, tts.sample_rate)
return tmp_path, f"Generated duration: {float(dur):.2f}s"
# ------------------------------------
# GRADIO UI
# ------------------------------------
def ui():
with gr.Blocks(title="ONNX TTS") as demo:
gr.Markdown("## 🎀 ONNX Text-To-Speech Demo")
text = gr.Textbox(
label="Input Text",
placeholder="Type something...",
lines=4
)
speed = gr.Slider(
label="Speed",
minimum=0.5,
maximum=2.0,
value=1.05,
step=0.01
)
generate_btn = gr.Button("Generate Speech πŸ”Š")
audio_out = gr.Audio(label="Output")
info = gr.Textbox(label="Info", interactive=False)
generate_btn.click(
fn=run_tts,
inputs=[text, speed],
outputs=[audio_out, info]
)
return demo
app = ui()
if __name__ == "__main__":
app.launch()