Spaces:
Build error
Build error
File size: 2,765 Bytes
23c332d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
import torch
import torchaudio
import os
import numpy as np
from liquid_audio import LFM2AudioModel, LFM2AudioProcessor
# --- CONFIGURATION ---
MODEL_ID = "LiquidAI/LFM2.5-Audio-1.5B"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"⏳ Loading model: {MODEL_ID} on {device}...")
try:
processor = LFM2AudioProcessor.from_pretrained(MODEL_ID)
model = LFM2AudioModel.from_pretrained(MODEL_ID).to(device)
print("✅ Model loaded successfully!")
except Exception as e:
print(f"❌ Error loading model: {e}")
model = None
def process_audio(input_audio_path):
if model is None:
return None, "Error: Model not loaded."
try:
# 1. Load and process the audio file
# Gradio passes audio as a filepath string
waveform, sample_rate = torchaudio.load(input_audio_path)
# Resample if necessary (Model typically expects 16kHz)
if sample_rate != 16000:
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
waveform = resampler(waveform)
sample_rate = 16000
# 2. Prepare inputs for the model
inputs = processor(
audio=waveform,
sampling_rate=sample_rate,
return_tensors="pt"
).to(device)
# 3. Generate response (Interleaved Audio-to-Audio)
# The generate parameters might need tuning based on specific version
with torch.no_grad():
generated_ids = model.generate(
**inputs,
max_new_tokens=256, # Adjust length as needed
do_sample=True,
temperature=0.7
)
# 4. Decode the output to audio
# The processor handles converting tokens back to waveform
output_waveform = processor.batch_decode(generated_ids)[0]
# Save to a temporary file to return to Gradio
output_path = "output_response.wav"
torchaudio.save(output_path, torch.tensor(output_waveform), 16000)
return output_path
except Exception as e:
return None, f"Error during inference: {str(e)}"
# --- GRADIO INTERFACE ---
with gr.Blocks(title="Liquid LFM2.5 Audio") as demo:
gr.Markdown("# 💧 LiquidAI LFM2.5 Audio (Speech-to-Speech)")
with gr.Row():
input_audio = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Input Speech")
output_audio = gr.Audio(type="filepath", label="Response", autoplay=True)
submit_btn = gr.Button("Generate Response", variant="primary")
submit_btn.click(
fn=process_audio,
inputs=[input_audio],
outputs=[output_audio]
)
if __name__ == "__main__":
demo.launch()
|