import os, sys
import tempfile
import gradio as gr
from app_tts import infer_tts
from src.gradio_demo import SadTalker
import datetime
import shutil
import re

# from src.utils.text2speech import TTSTalker
from huggingface_hub import snapshot_download
import glob


def get_source_image(image):
    return image


try:
    import webui  # in webui

    in_webui = True
except:
    in_webui = False


def toggle_audio_file(choice):
    if choice == False:
        return gr.update(visible=True), gr.update(visible=False)
    else:
        return gr.update(visible=False), gr.update(visible=True)


def safe_filename(name: str) -> str:
    return re.sub(r"[^a-zA-Z0-9_\-\.]", "_", name)


def ref_video_fn(path_of_ref_video):
    if path_of_ref_video is not None:
        return gr.update(value=True)
    else:
        return gr.update(value=False)


def download_model():
    REPO_ID = "vinthony/SadTalker-V002rc"
    snapshot_download(
        repo_id=REPO_ID,
        local_dir="./checkpoints",
        local_dir_use_symlinks=True,
    )


def list_videos():
    # Lấy danh sách tất cả file mp4 trong results
    PATH_RESULTS = "results"
    video_files = glob.glob(f"{PATH_RESULTS}/*.mp4", recursive=True)
    # Trả về danh sách file (có thể sort theo thời gian)
    return sorted(video_files, reverse=True)


# New: Gộp 2 nút thành 1, output audio là input cho video
import soundfile as sf


def generate_voice_and_video(
    ref_audio,
    ref_text,
    gen_text,
    speed,
    source_image,
    preprocess_type,
    is_still_mode,
    enhancer,
    batch_size,
    size_of_image,
    pose_style,
    facerender,
    exp_weight,
    use_ref_video,
    ref_video,
    ref_info,
    use_idle_mode,
    length_of_audio,
    blink_every,
):

    final_sample_rate, final_wave = None, None
    logs = ""
    # Bắt đầu: Hiển thị trạng thái đang tạo audio
    yield (
        gr.update(value=None, visible=True, interactive=False),
        gr.update(value=None, visible=True, interactive=False),
        gr.update(value="⏳ Đang tạo âm thanh...", visible=True),
        gr.update(choices=list_videos()),
    )

    # 1. Sinh audio từ TTS

    for output_audio, log_msg in infer_tts(ref_audio, ref_text, gen_text, speed):
        if log_msg:  # cập nhật log
            logs += log_msg + "\n"
            yield (
                gr.update(value=None, visible=True, interactive=False),
                gr.update(value=None, visible=True, interactive=False),
                gr.update(value=logs, visible=True),
                gr.update(choices=list_videos()),
            )
        if output_audio:  # kết quả audio cuối
            final_sample_rate, final_wave = output_audio
    if final_wave is None:
        yield (
            gr.update(value=None, visible=True),
            gr.update(value=None, visible=True),
            gr.update(value="❌ Tạo audio thất bại", visible=True),
            gr.update(choices=list_videos()),
        )
        return
    tmp_audio = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)

    sf.write(tmp_audio.name, final_wave, final_sample_rate)
    os.makedirs("results", exist_ok=True)
    safe_audio_name = safe_filename(
        f"atalink_audio_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
    )
    final_audio_path = os.path.join("results", safe_audio_name)
    shutil.copy(tmp_audio.name, final_audio_path)
    # Audio xong, chuyển sang tạo video
    yield (
        gr.update(value=final_audio_path, visible=True, interactive=True),
        gr.update(value=None, visible=True, interactive=False),
        gr.update(value="⏳ Đang tạo video...", visible=True),
        gr.update(choices=list_videos()),
    )

    # 2. Gọi SadTalker với audio vừa sinh ra
    sad_talker = SadTalker(lazy_load=True)
    video_path = sad_talker.test(
        source_image,
        final_audio_path,
        preprocess_type,
        is_still_mode,
        enhancer,
        batch_size,
        size_of_image,
        pose_style,
        facerender,
        exp_weight,
        use_ref_video,
        ref_video,
        ref_info,
        use_idle_mode,
        length_of_audio,
        blink_every,
    )
    safe_name = f"atalink_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
    final_path = os.path.join("results", safe_name)
    shutil.move(video_path, final_path)

    # Cả audio và video đã xong
    yield (
        gr.update(value=final_audio_path, visible=True, interactive=True),
        gr.update(value=final_path, visible=True, interactive=True),
        gr.update(value="✅ Hoàn thành!", visible=True),
        gr.update(choices=list_videos(), value=final_path),
    )


def list_files(directory):
    try:
        files = os.listdir(directory)
        return "\n".join(files)
    except Exception as e:
        return str(e)


def sadtalker_demo():
    download_model()
    with gr.Blocks(
        analytics_enabled=False,
    ) as sadtalker_interface:
        gr.Markdown(
            f"""
    ![logo](https://vietnam.atalink.com/favicon.ico)

    ## Atalink TTS_Talker

    Nhập text, upload sample voice và ảnh để tạo video nói chuyện.
        """
        )
        with gr.Tab("Tạo video mới"):
            with gr.Row(elem_classes="gr-row"):
                ref_audio = gr.Audio(label="🔊 Sample Voice", type="filepath")
                ref_text = gr.Textbox(
                    label="📝 Nội dung tham khảo (tùy chọn)",
                    placeholder="Nhập transcript tiếng Việt cho sample voice nếu có...",
                    lines=2,
                )
            with gr.Row(elem_classes="gr-row"):
                gen_text = gr.Textbox(
                    label="📝 Nội dung cần tạo",
                    placeholder="Nhập nội dung để tạo giọng nói...",
                    lines=3,
                )
                speed = gr.Slider(
                    0.3,
                    2.0,
                    value=1.0,
                    step=0.1,
                    label="⚡ Tốc độ nói",
                    info="Chỉnh tốc độ phát âm",
                )
            with gr.Row(elem_classes="gr-row"):
                source_image = gr.Image(
                    label="Ảnh nguồn", type="filepath", elem_id="img2img_image"
                )
            with gr.Accordion(
                "Cài đặt nâng cao SadTalker", open=False, elem_classes="gr-button"
            ):
                with gr.Row(elem_classes="gr-row"):
                    preprocess_type = gr.Radio(
                        ["crop", "resize", "full", "extcrop", "extfull"],
                        value="crop",
                        label="Tiền xử lý ảnh",
                        info="Cách xử lý ảnh đầu vào?",
                    )
                    is_still_mode = gr.Checkbox(
                        label="Chế độ tĩnh (ít chuyển động đầu)"
                    )
                    enhancer = gr.Checkbox(label="Dùng GFPGAN làm đẹp mặt")
                    batch_size = gr.Slider(
                        label="Batch size", step=1, maximum=10, value=1
                    )
                    size_of_image = gr.Radio(
                        [256, 512],
                        value=256,
                        label="Độ phân giải khuôn mặt",
                        info="Dùng model 256/512?",
                    )
                with gr.Row(elem_classes="gr-row"):
                    pose_style = gr.Slider(
                        minimum=0, maximum=45, step=1, label="Kiểu pose", value=0
                    )
                    facerender = gr.Radio(
                        ["facevid2vid", "pirender"],
                        value="facevid2vid",
                        label="Face render",
                        info="Chọn kiểu render mặt",
                    )
                    exp_weight = gr.Slider(
                        minimum=0,
                        maximum=3,
                        step=0.1,
                        label="Biên độ biểu cảm",
                        value=1,
                    )
                    use_ref_video = gr.Checkbox(label="Dùng video tham chiếu")
                    ref_video = gr.Video(
                        label="Video tham chiếu",
                        elem_id="vidref",
                        height=120,
                        width=120,
                    )
                    ref_info = gr.Radio(
                        ["pose", "blink", "pose+blink", "all"],
                        value="pose",
                        label="Tham chiếu",
                        info="Cách lấy thông tin từ video tham chiếu?",
                    )
                    use_idle_mode = gr.Checkbox(label="Idle Animation")
                    length_of_audio = gr.Number(value=5, label="Độ dài video (giây)")
                    blink_every = gr.Checkbox(label="Chớp mắt", value=True)
            btn_generate = gr.Button(
                "🔥 Tạo giọng nói & video", elem_id="btn-generate", interactive=False
            )
            with gr.Row(elem_classes="gr-row"):
                output_audio = gr.Audio(
                    label="🎧 Audio đã tạo", type="filepath", show_download_button=True
                )
                gen_video = gr.Video(
                    label="Video đã tạo",
                    format="mp4",
                    scale=1,
                    width=180,
                    show_download_button=True,
                )
                status_box = gr.Textbox(
                    label="Trạng thái tiến trình",
                    interactive=False,
                    value="",
                    visible=True,
                    lines=3,
                    max_lines=4,
                )

            def enable_generate(audio, text, image):
                return gr.update(interactive=bool(audio and text and image))

            ref_audio.change(
                enable_generate, [ref_audio, gen_text, source_image], btn_generate
            )
            gen_text.change(
                enable_generate, [ref_audio, gen_text, source_image], btn_generate
            )
            source_image.change(
                enable_generate, [ref_audio, gen_text, source_image], btn_generate
            )

        with gr.Tab("Lịch sử video"):
            with gr.Row(elem_classes="gr-row"):
                refresh_btn = gr.Button("🔄 Refresh File List")
                choices = ["Vui lòng chọn video"] + list_videos()
                video_list = gr.Dropdown(
                    value="Vui lòng chọn video",
                    choices=choices,
                    label="Chọn video để xem",
                    interactive=True,
                    scale=1,
                )
                video_player = gr.Video(
                    height=180,
                    width=180,
                    label="Video lịch sử",
                    scale=1,
                    show_download_button=True,
                )
            refresh_btn.click(
                fn=lambda: gr.update(
                    choices=["Vui lòng chọn video"] + list_videos(),
                    value="Vui lòng chọn video",
                ),
                outputs=video_list,
            )
            video_list.change(lambda x: x, inputs=video_list, outputs=video_player)
        with gr.Tab("Debug"):
            with gr.Blocks() as demo:
                directory_input = gr.Textbox(label="Enter Directory Path", value=".")
                file_list_output = gr.Textbox(label="Files", lines=10)

            directory_input.change(
                fn=list_files, inputs=directory_input, outputs=file_list_output
            )

        btn_generate.click(
            generate_voice_and_video,
            inputs=[
                ref_audio,
                ref_text,
                gen_text,
                speed,
                source_image,
                preprocess_type,
                is_still_mode,
                enhancer,
                batch_size,
                size_of_image,
                pose_style,
                facerender,
                exp_weight,
                use_ref_video,
                ref_video,
                ref_info,
                use_idle_mode,
                length_of_audio,
                blink_every,
            ],
            outputs=[output_audio, gen_video, status_box, video_list],
        )
    return sadtalker_interface


if __name__ == "__main__":
    demo = sadtalker_demo()
    demo.queue(max_size=10, api_open=True)
    demo.launch(debug=True, server_name="0.0.0.0")