harpalsinh8 commited on
Commit
f441ec8
·
verified ·
1 Parent(s): 45f6601

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -24
app.py CHANGED
@@ -1,30 +1,32 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
2
  import torchaudio
3
- from torchao_tts import TTSModel
4
 
5
- # Load Kyutai TTS model
6
  model_id = "kyutai/tts-1.6b-en_fr"
7
- model = TTSModel.from_pretrained(model_id)
8
 
9
- # Function to generate speech
10
- def tts_fn(text, language="en"):
11
- # Generate waveform
12
- audio = model.generate(text, language=language)
13
- # Save to temporary wav
14
- torchaudio.save("output.wav", audio, 16000)
15
- return "output.wav"
16
 
17
- # Gradio UI
18
- demo = gr.Interface(
19
- fn=tts_fn,
20
- inputs=[
21
- gr.Textbox(label="Input text", placeholder="Type something to speak..."),
22
- gr.Dropdown(choices=["en", "fr"], value="en", label="Language"),
23
- ],
24
- outputs=gr.Audio(type="filepath", label="Generated Speech"),
25
- title="Kyutai TTS (1.6B EN/FR)",
26
- description="Test the kyutai/tts-1.6b-en_fr model for English/French speech synthesis."
27
- )
28
 
29
- if __name__ == "__main__":
30
- demo.launch()
 
1
+ # ======================
2
+ # Setup
3
+ # ======================
4
+ !git clone https://github.com/kyutai-labs/delayed-streams-modeling.git
5
+ %cd delayed-streams-modeling
6
+
7
+ !pip install -q torch torchaudio gradio moshi
8
+
9
+ # ======================
10
+ # Import + Load Model
11
+ # ======================
12
+ import torch
13
  import torchaudio
14
+ from moshi.models import TTSModel
15
 
16
+ # Load Kyutai TTS model (English + French)
17
  model_id = "kyutai/tts-1.6b-en_fr"
18
+ model = TTSModel.from_pretrained(model_id, device="cuda" if torch.cuda.is_available() else "cpu")
19
 
20
+ # ======================
21
+ # Simple test function
22
+ # ======================
23
+ def synthesize(text, lang="en", filename="out.wav"):
24
+ audio = model.generate(text, language=lang)
25
+ torchaudio.save(filename, audio.cpu(), 16000)
26
+ return filename
27
 
28
+ # Example: run once to check
29
+ synthesize("Hello, this is Kyutai TTS running on Kaggle!", "en", "demo.wav")
 
 
 
 
 
 
 
 
 
30
 
31
+ import IPython.display as ipd
32
+ ipd.Audio("demo.wav")