Spaces:
Runtime error
Runtime error
| import os | |
| import uuid | |
| import subprocess | |
| from pathlib import Path | |
| import gradio as gr | |
| from PIL import Image | |
| from pydub import AudioSegment | |
| # ββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. Download model checkpoint once | |
| # ββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_PATH = Path("wav2lip_gan.pth") | |
| MODEL_URL = ( | |
| "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth" | |
| ) # public mirror | |
| if not MODEL_PATH.exists(): | |
| os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. Helper: resize image + convert audio β 16 kHz mono WAV | |
| # ββββββββββββββββββββββββββββββββββββββββββββββ | |
| def preprocess(image, audio_file): | |
| if image is None or audio_file is None: | |
| raise ValueError("Both an image and an audio file are required.") | |
| uid = uuid.uuid4().hex | |
| img_path = f"{uid}.jpg" | |
| wav_path = f"{uid}.wav" | |
| out_path = f"{uid}_result.mp4" | |
| # resize image to 256 px height (keeps aspect ratio) | |
| image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS) | |
| image.save(img_path) | |
| # convert audio to 16 kHz mono WAV | |
| seg = AudioSegment.from_file(audio_file) | |
| seg = seg.set_frame_rate(16_000).set_channels(1) | |
| seg.export(wav_path, format="wav") | |
| return img_path, wav_path, out_path | |
| # ββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. Main inference wrapper | |
| # ββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate(image, audio): | |
| try: | |
| img, wav, out_vid = preprocess(image, audio) | |
| except Exception as e: | |
| return f"β {e}" | |
| subprocess.run( | |
| [ | |
| "python", "inference.py", | |
| "--checkpoint_path", str(MODEL_PATH), | |
| "--face", img, | |
| "--audio", wav, | |
| "--outfile", out_vid, | |
| ], | |
| check=True, | |
| ) | |
| return out_vid if Path(out_vid).exists() else "β Generation failed." | |
| # ββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. Gradio UI | |
| # ββββββββββββββββββββββββββββββββββββββββββββββ | |
| demo = gr.Interface( | |
| fn=generate, | |
| inputs=[gr.Image(type="pil", label="Image"), | |
| gr.Audio(type="filepath", label="Audio (any format)")], | |
| outputs=gr.Video(label="Talking-head MP4"), | |
| title="π£οΈ Wav2Lip CPU Demo", | |
| description="Upload a single face image and an audio clip to create a lip-synced video (runs on free CPU tier).", | |
| allow_flagging="never", | |
| live=True, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |