Spaces:
Runtime error
Runtime error
File size: 3,242 Bytes
952337e 13089ed a30d89d bfd9324 13089ed a37c88f a30d89d a37c88f a30d89d 952337e a30d89d 952337e a30d89d 6b0172d a30d89d 952337e a30d89d bfd9324 a30d89d bfd9324 a30d89d bfd9324 a30d89d 952337e a30d89d 952337e a30d89d 952337e a30d89d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import os
import uuid
import subprocess
from pathlib import Path
import gradio as gr
from PIL import Image
from pydub import AudioSegment
# ββββββββββββββββββββββββββββββββββββββββββββββ
# 1. Download model checkpoint once
# ββββββββββββββββββββββββββββββββββββββββββββββ
MODEL_PATH = Path("wav2lip_gan.pth")
MODEL_URL = (
"https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"
) # public mirror
if not MODEL_PATH.exists():
os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")
# ββββββββββββββββββββββββββββββββββββββββββββββ
# 2. Helper: resize image + convert audio β 16 kHz mono WAV
# ββββββββββββββββββββββββββββββββββββββββββββββ
def preprocess(image, audio_file):
if image is None or audio_file is None:
raise ValueError("Both an image and an audio file are required.")
uid = uuid.uuid4().hex
img_path = f"{uid}.jpg"
wav_path = f"{uid}.wav"
out_path = f"{uid}_result.mp4"
# resize image to 256 px height (keeps aspect ratio)
image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
image.save(img_path)
# convert audio to 16 kHz mono WAV
seg = AudioSegment.from_file(audio_file)
seg = seg.set_frame_rate(16_000).set_channels(1)
seg.export(wav_path, format="wav")
return img_path, wav_path, out_path
# ββββββββββββββββββββββββββββββββββββββββββββββ
# 3. Main inference wrapper
# ββββββββββββββββββββββββββββββββββββββββββββββ
def generate(image, audio):
try:
img, wav, out_vid = preprocess(image, audio)
except Exception as e:
return f"β {e}"
subprocess.run(
[
"python", "inference.py",
"--checkpoint_path", str(MODEL_PATH),
"--face", img,
"--audio", wav,
"--outfile", out_vid,
],
check=True,
)
return out_vid if Path(out_vid).exists() else "β Generation failed."
# ββββββββββββββββββββββββββββββββββββββββββββββ
# 4. Gradio UI
# ββββββββββββββββββββββββββββββββββββββββββββββ
demo = gr.Interface(
fn=generate,
inputs=[gr.Image(type="pil", label="Image"),
gr.Audio(type="filepath", label="Audio (any format)")],
outputs=gr.Video(label="Talking-head MP4"),
title="π£οΈ Wav2Lip CPU Demo",
description="Upload a single face image and an audio clip to create a lip-synced video (runs on free CPU tier).",
allow_flagging="never",
live=True,
)
if __name__ == "__main__":
demo.launch()
|