Spaces:

mich123geb
/

wav2lip_api

Runtime error

App Files Files Community

wav2lip_api / app.py

mich123geb

Update app.py

a30d89d verified 5 months ago

raw

history blame

3.24 kB

	import os
	import uuid
	import subprocess
	from pathlib import Path

	import gradio as gr
	from PIL import Image
	from pydub import AudioSegment

	# ──────────────────────────────────────────────
	# 1. Download model checkpoint once
	# ──────────────────────────────────────────────
	MODEL_PATH = Path("wav2lip_gan.pth")
	MODEL_URL = (
	"https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"
	) # public mirror

	if not MODEL_PATH.exists():
	os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")

	# ──────────────────────────────────────────────
	# 2. Helper: resize image + convert audio → 16 kHz mono WAV
	# ──────────────────────────────────────────────
	def preprocess(image, audio_file):
	if image is None or audio_file is None:
	raise ValueError("Both an image and an audio file are required.")

	uid = uuid.uuid4().hex
	img_path = f"{uid}.jpg"
	wav_path = f"{uid}.wav"
	out_path = f"{uid}_result.mp4"

	# resize image to 256 px height (keeps aspect ratio)
	image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
	image.save(img_path)

	# convert audio to 16 kHz mono WAV
	seg = AudioSegment.from_file(audio_file)
	seg = seg.set_frame_rate(16_000).set_channels(1)
	seg.export(wav_path, format="wav")

	return img_path, wav_path, out_path

	# ──────────────────────────────────────────────
	# 3. Main inference wrapper
	# ──────────────────────────────────────────────
	def generate(image, audio):
	try:
	img, wav, out_vid = preprocess(image, audio)
	except Exception as e:
	return f"❌ {e}"

	subprocess.run(
	[
	"python", "inference.py",
	"--checkpoint_path", str(MODEL_PATH),
	"--face", img,
	"--audio", wav,
	"--outfile", out_vid,
	],
	check=True,
	)

	return out_vid if Path(out_vid).exists() else "❌ Generation failed."

	# ──────────────────────────────────────────────
	# 4. Gradio UI
	# ──────────────────────────────────────────────
	demo = gr.Interface(
	fn=generate,
	inputs=[gr.Image(type="pil", label="Image"),
	gr.Audio(type="filepath", label="Audio (any format)")],
	outputs=gr.Video(label="Talking-head MP4"),
	title="🗣️ Wav2Lip CPU Demo",
	description="Upload a single face image and an audio clip to create a lip-synced video (runs on free CPU tier).",
	allow_flagging="never",
	live=True,
	)

	if __name__ == "__main__":
	demo.launch()