Spaces:

thankfulcarp
/

Wan_t2v_FusionX_with_Loras

Runtime error

App Files Files Community

thankfulcarp commited on Jul 2, 2025

Commit

1d1a8c3

1 Parent(s): e4ae032

Enhancer fix

Browse files

Files changed (1) hide show

app.py +27 -36

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ import json
 import random
 import tempfile
 import traceback
-from functools import partial
 import gradio as gr
 import numpy as np
@@ -163,7 +162,6 @@ def handle_lora_selection_change(preset_name: str, current_prompt: str):
 def load_pipelines():
     """Loads and configures the T2V and LLM pipelines."""
     t2v_pipe = None
-    enhancer_pipe = None
     print("\n🚀 Loading T2V pipeline with base LoRA...")
     try:
@@ -192,35 +190,35 @@ def load_pipelines():
         traceback.print_exc()
         t2v_pipe = None
-    print("\n🤖 Loading LLM for Prompt Enhancement...")
-    try:
-        # In a ZeroGPU environment, we must load models on the CPU at startup.
-        # The model will be moved to the GPU inside the decorated function.
-        enhancer_pipe = pipeline("text-generation", model=ENHANCER_MODEL_ID, torch_dtype=torch.bfloat16, device="cpu")
-        print("✅ LLM Prompt Enhancer loaded successfully (on CPU).")
-    except Exception as e:
-        print(f"⚠️ WARNING: Could not load the LLM prompt enhancer. The feature will be disabled. Error: {e}")
-        enhancer_pipe = None
-    return t2v_pipe, enhancer_pipe
 # --- 5. Core Generation & UI Logic ---
-@spaces.GPU()
-def enhance_prompt_with_llm(prompt: str, enhancer_pipeline):
-    """Uses the loaded LLM to enhance a given prompt."""
-    if enhancer_pipeline is None:
-        print("LLM enhancer not available, returning original prompt.")
-        gr.Warning("LLM enhancer is not available.")
-        return prompt
-    # Move the model to the GPU now that we are in a decorated function
-    enhancer_pipeline.model.to("cuda")
     messages = [{"role": "system", "content": T2V_CINEMATIC_PROMPT_SYSTEM}, {"role": "user", "content": prompt}]
     print(f"Enhancing prompt: '{prompt}'")
     try:
-        outputs = enhancer_pipeline(messages, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.95)
         final_answer = outputs[0]['generated_text'][-1]['content']
         print(f"Enhanced prompt: '{final_answer.strip()}'")
         return final_answer.strip()
@@ -340,13 +338,9 @@ def generate_t2v_video(
 # --- 6. Gradio UI Layout ---
-def build_ui(t2v_pipe, enhancer_pipe, available_loras):
     """Creates and configures the Gradio UI."""
     with gr.Blocks(theme=gr.themes.Soft(), css=".main-container { max-width: 1080px; margin: auto; }") as demo:
-        # We don't use gr.State for the pipeline object because it's not serializable
-        # and causes a deepcopy error with tensors on multiple devices (CPU/GPU).
-        # Instead, we use functools.partial to bind the pipeline to its handler function.
         gr.Markdown("# ✨ Wan 2.1 Text-to-Video Suite with Dynamic LoRAs")
         gr.Markdown("Generate videos from text, enhanced by the base `FusionX` LoRA and your choice of dynamic style LoRA.")
@@ -363,8 +357,8 @@ def build_ui(t2v_pipe, enhancer_pipe, available_loras):
                             )
                             t2v_enhance_btn = gr.Button(
                                 "🤖 Enhance Prompt with AI",
-                                # The button is disabled if the enhancer pipeline failed to load
-                                interactive=enhancer_pipe is not None
                             )
                             with gr.Group():
@@ -400,11 +394,8 @@ def build_ui(t2v_pipe, enhancer_pipe, available_loras):
                             t2v_download = gr.File(label="📥 Download Video", visible=False)
         if t2v_pipe is not None:
-            # Create a partial function that has the enhancer_pipe "baked in".
-            # This avoids the need to pass the complex object through Gradio's state.
-            enhance_fn = partial(enhance_prompt_with_llm, enhancer_pipeline=enhancer_pipe)
             t2v_enhance_btn.click(
-                fn=enhance_fn,
                 inputs=[t2v_prompt],
                 outputs=[t2v_prompt]
             )
@@ -427,12 +418,12 @@ def build_ui(t2v_pipe, enhancer_pipe, available_loras):
 # --- 7. Main Execution ---
 if __name__ == "__main__":
-    t2v_pipe, enhancer_pipe = load_pipelines()
     # Fetch LoRAs only if the main pipeline loaded successfully
     available_loras = []
     if t2v_pipe:
         available_loras = get_available_presets(DYNAMIC_LORA_REPO_ID, DYNAMIC_LORA_SUBFOLDER)
-    app_ui = build_ui(t2v_pipe, enhancer_pipe, available_loras)
     app_ui.queue(max_size=10).launch()

 import random
 import tempfile
 import traceback
 import gradio as gr
 import numpy as np
 def load_pipelines():
     """Loads and configures the T2V and LLM pipelines."""
     t2v_pipe = None
     print("\n🚀 Loading T2V pipeline with base LoRA...")
     try:
         traceback.print_exc()
         t2v_pipe = None
+    # The enhancer pipeline is now loaded on-demand inside its decorated function.
+    return t2v_pipe
 # --- 5. Core Generation & UI Logic ---
+ENHANCER_PIPE_CACHE = None # Global cache for the LLM pipeline
+@spaces.GPU()
+def enhance_prompt_with_llm(prompt: str):
+    """
+    Uses a cached LLM to enhance a given prompt.
+    In a ZeroGPU environment, the model is loaded on the first call.
+    """
+    global ENHANCER_PIPE_CACHE
+    if ENHANCER_PIPE_CACHE is None:
+        print("\n🤖 Loading LLM for Prompt Enhancement (first run)...")
+        try:
+            # This happens inside the GPU session, so device_map="auto" is correct.
+            ENHANCER_PIPE_CACHE = pipeline("text-generation", model=ENHANCER_MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto")
+            print("✅ LLM Prompt Enhancer loaded successfully.")
+        except Exception as e:
+            print(f"❌ Error loading LLM enhancer: {e}")
+            raise gr.Error("Could not load the AI prompt enhancer. Please check the logs.")
     messages = [{"role": "system", "content": T2V_CINEMATIC_PROMPT_SYSTEM}, {"role": "user", "content": prompt}]
     print(f"Enhancing prompt: '{prompt}'")
     try:
+        outputs = ENHANCER_PIPE_CACHE(messages, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.95)
         final_answer = outputs[0]['generated_text'][-1]['content']
         print(f"Enhanced prompt: '{final_answer.strip()}'")
         return final_answer.strip()
 # --- 6. Gradio UI Layout ---
+def build_ui(t2v_pipe, available_loras):
     """Creates and configures the Gradio UI."""
     with gr.Blocks(theme=gr.themes.Soft(), css=".main-container { max-width: 1080px; margin: auto; }") as demo:
         gr.Markdown("# ✨ Wan 2.1 Text-to-Video Suite with Dynamic LoRAs")
         gr.Markdown("Generate videos from text, enhanced by the base `FusionX` LoRA and your choice of dynamic style LoRA.")
                             )
                             t2v_enhance_btn = gr.Button(
                                 "🤖 Enhance Prompt with AI",
+                                # This is now always interactive. Errors are handled inside the click handler.
+                                interactive=True
                             )
                             with gr.Group():
                             t2v_download = gr.File(label="📥 Download Video", visible=False)
         if t2v_pipe is not None:
             t2v_enhance_btn.click(
+                fn=enhance_prompt_with_llm,
                 inputs=[t2v_prompt],
                 outputs=[t2v_prompt]
             )
 # --- 7. Main Execution ---
 if __name__ == "__main__":
+    t2v_pipe = load_pipelines()
     # Fetch LoRAs only if the main pipeline loaded successfully
     available_loras = []
     if t2v_pipe:
         available_loras = get_available_presets(DYNAMIC_LORA_REPO_ID, DYNAMIC_LORA_SUBFOLDER)
+    app_ui = build_ui(t2v_pipe, available_loras)
     app_ui.queue(max_size=10).launch()