import gradio as gr import torch from transformers import AutoModelForTextToWaveform, AutoTokenizer model_id = "kyutai/tts-1.6b-en_fr" model = AutoModelForTextToWaveform.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) def tts_fn(text, language="en"): inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): audio = model.generate(**inputs, language=language) return (16000, audio.cpu().numpy()) demo = gr.Interface( fn=tts_fn, inputs=[ gr.Textbox(label="Enter text"), gr.Dropdown(["en", "fr"], value="en", label="Language"), ], outputs=gr.Audio(label="Generated Speech"), title="Kyutai TTS Free Demo (CPU)", description="⚠️ Running on free CPU → slow, but works!" ) if __name__ == "__main__": demo.launch()