import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSpeechSeq2Seq import soundfile as sf import numpy as np import tempfile # Tên model TTS MODEL_NAME = "hynt/F5-TTS-Vietnamese-100h" # Load model & tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSpeechSeq2Seq.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, # CPU Basic → dùng float32 ) # Đảm bảo chạy CPU device = torch.device("cpu") model = model.to(device) def tts_generate(text): if not text.strip(): return None inputs = tokenizer(text, return_tensors="pt").to(device) # Sinh audio with torch.no_grad(): audio = model.generate(**inputs) audio = audio.cpu().numpy().squeeze() # Lưu tạm wav tmp_path = tempfile.mktemp(suffix=".wav") sf.write(tmp_path, audio, 22050) return tmp_path ### Gradio UI ### with gr.Blocks(title="TTS Vietnamese Free") as demo: gr.Markdown("# 🇻🇳 Text-to-Speech Vietnamese (Free Version)\nNhập văn bản tiếng Việt và nhấn **Sinh giọng nói**:") text_in = gr.Textbox(lines=5, label="Văn bản") audio_out = gr.Audio(label="Kết quả", type="filepath") btn = gr.Button("🎤 Sinh giọng nói") btn.click(tts_generate, inputs=text_in, outputs=audio_out) demo.launch()