EgoisticCoderX commited on
Commit
23c332d
·
verified ·
1 Parent(s): fa81513

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+ import os
5
+ import numpy as np
6
+ from liquid_audio import LFM2AudioModel, LFM2AudioProcessor
7
+
8
+ # --- CONFIGURATION ---
9
+ MODEL_ID = "LiquidAI/LFM2.5-Audio-1.5B"
10
+ device = "cuda" if torch.cuda.is_available() else "cpu"
11
+
12
+ print(f"⏳ Loading model: {MODEL_ID} on {device}...")
13
+ try:
14
+ processor = LFM2AudioProcessor.from_pretrained(MODEL_ID)
15
+ model = LFM2AudioModel.from_pretrained(MODEL_ID).to(device)
16
+ print("✅ Model loaded successfully!")
17
+ except Exception as e:
18
+ print(f"❌ Error loading model: {e}")
19
+ model = None
20
+
21
+ def process_audio(input_audio_path):
22
+ if model is None:
23
+ return None, "Error: Model not loaded."
24
+
25
+ try:
26
+ # 1. Load and process the audio file
27
+ # Gradio passes audio as a filepath string
28
+ waveform, sample_rate = torchaudio.load(input_audio_path)
29
+
30
+ # Resample if necessary (Model typically expects 16kHz)
31
+ if sample_rate != 16000:
32
+ resampler = torchaudio.transforms.Resample(sample_rate, 16000)
33
+ waveform = resampler(waveform)
34
+ sample_rate = 16000
35
+
36
+ # 2. Prepare inputs for the model
37
+ inputs = processor(
38
+ audio=waveform,
39
+ sampling_rate=sample_rate,
40
+ return_tensors="pt"
41
+ ).to(device)
42
+
43
+ # 3. Generate response (Interleaved Audio-to-Audio)
44
+ # The generate parameters might need tuning based on specific version
45
+ with torch.no_grad():
46
+ generated_ids = model.generate(
47
+ **inputs,
48
+ max_new_tokens=256, # Adjust length as needed
49
+ do_sample=True,
50
+ temperature=0.7
51
+ )
52
+
53
+ # 4. Decode the output to audio
54
+ # The processor handles converting tokens back to waveform
55
+ output_waveform = processor.batch_decode(generated_ids)[0]
56
+
57
+ # Save to a temporary file to return to Gradio
58
+ output_path = "output_response.wav"
59
+ torchaudio.save(output_path, torch.tensor(output_waveform), 16000)
60
+
61
+ return output_path
62
+
63
+ except Exception as e:
64
+ return None, f"Error during inference: {str(e)}"
65
+
66
+ # --- GRADIO INTERFACE ---
67
+ with gr.Blocks(title="Liquid LFM2.5 Audio") as demo:
68
+ gr.Markdown("# 💧 LiquidAI LFM2.5 Audio (Speech-to-Speech)")
69
+
70
+ with gr.Row():
71
+ input_audio = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Input Speech")
72
+ output_audio = gr.Audio(type="filepath", label="Response", autoplay=True)
73
+
74
+ submit_btn = gr.Button("Generate Response", variant="primary")
75
+
76
+ submit_btn.click(
77
+ fn=process_audio,
78
+ inputs=[input_audio],
79
+ outputs=[output_audio]
80
+ )
81
+
82
+ if __name__ == "__main__":
83
+ demo.launch()