import os
import uuid
import subprocess
from pathlib import Path

import gradio as gr
from PIL import Image
from pydub import AudioSegment

# ──────────────────────────────────────────────
# 1.  Download model checkpoint once
# ──────────────────────────────────────────────
MODEL_PATH = Path("wav2lip_gan.pth")
MODEL_URL  = (
    "https://huggingface.co/spaces/fffiloni/wav2lip/resolve/main/wav2lip_gan.pth"
)  # public mirror

if not MODEL_PATH.exists():
    os.system(f"wget -q {MODEL_URL} -O {MODEL_PATH}")

# ──────────────────────────────────────────────
# 2.  Helper: resize image + convert audio → 16 kHz mono WAV
# ──────────────────────────────────────────────
def preprocess(image, audio_file):
    if image is None or audio_file is None:
        raise ValueError("Both an image and an audio file are required.")

    uid = uuid.uuid4().hex
    img_path   = f"{uid}.jpg"
    wav_path   = f"{uid}.wav"
    out_path   = f"{uid}_result.mp4"

    # resize image to 256 px height (keeps aspect ratio)
    image = image.resize((int(image.width * 256 / image.height), 256), Image.Resampling.LANCZOS)
    image.save(img_path)

    # convert audio to 16 kHz mono WAV
    seg = AudioSegment.from_file(audio_file)
    seg = seg.set_frame_rate(16_000).set_channels(1)
    seg.export(wav_path, format="wav")

    return img_path, wav_path, out_path

# ──────────────────────────────────────────────
# 3.  Main inference wrapper
# ──────────────────────────────────────────────
def generate(image, audio):
    try:
        img, wav, out_vid = preprocess(image, audio)
    except Exception as e:
        return f"❌ {e}"

    subprocess.run(
        [
            "python", "inference.py",
            "--checkpoint_path", str(MODEL_PATH),
            "--face", img,
            "--audio", wav,
            "--outfile", out_vid,
        ],
        check=True,
    )

    return out_vid if Path(out_vid).exists() else "❌ Generation failed."

# ──────────────────────────────────────────────
# 4.  Gradio UI
# ──────────────────────────────────────────────
demo = gr.Interface(
    fn=generate,
    inputs=[gr.Image(type="pil", label="Image"),
            gr.Audio(type="filepath", label="Audio (any format)")],
    outputs=gr.Video(label="Talking-head MP4"),
    title="🗣️ Wav2Lip CPU Demo",
    description="Upload a single face image and an audio clip to create a lip-synced video (runs on free CPU tier).",
    allow_flagging="never",
    live=True,
)

if __name__ == "__main__":
    demo.launch()