Spaces:

thankfulcarp
/

Wan_t2v_FusionX_with_Loras

Runtime error

App Files Files Community

thankfulcarp commited on Jul 3

Commit

ae27ee5

1 Parent(s): ed32595

Changed to loading LLM at start again.

Browse files

Files changed (1) hide show

app.py +32 -30

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ import json
 import random
 import tempfile
 import traceback
 import gradio as gr
 import numpy as np
@@ -219,7 +220,7 @@ def _manage_lora_state(pipe, selected_lora: str, lora_weight: float) -> bool:
 def load_pipelines():
     """Loads and configures the T2V and LLM pipelines."""
-    t2v_pipe = None
     print("\n🚀 Loading T2V pipeline with base LoRA...")
     try:
@@ -248,40 +249,38 @@ def load_pipelines():
         traceback.print_exc()
         t2v_pipe = None
-    # The enhancer pipeline is now loaded on-demand inside its decorated function.
-    return t2v_pipe
-# --- 5. Core Generation & UI Logic ---
-ENHANCER_PIPE_CACHE = None # Global cache for the LLM pipeline
 @spaces.GPU()
-def enhance_prompt_with_llm(prompt: str):
     """
-    Uses a cached LLM to enhance a given prompt.
-    In a ZeroGPU environment, the model is loaded on the first call.
     """
-    global ENHANCER_PIPE_CACHE
-    if ENHANCER_PIPE_CACHE is None:
-        print("\n🤖 Loading LLM for Prompt Enhancement (first run)...")
-        try:
-            # This happens inside the GPU session, so device_map="auto" is correct.
-            ENHANCER_PIPE_CACHE = pipeline(
-                "text-generation",
-                model=ENHANCER_MODEL_ID,
-                torch_dtype=torch.bfloat16,
-                device_map="auto"
-            )
-            print("✅ LLM Prompt Enhancer loaded successfully.")
-        except Exception as e:
-            print(f"❌ Error loading LLM enhancer: {e}")
-            raise gr.Error("Could not load the AI prompt enhancer. Please check the logs.")
     messages = [{"role": "system", "content": T2V_CINEMATIC_PROMPT_SYSTEM}, {"role": "user", "content": prompt}]
     print(f"Enhancing prompt: '{prompt}'")
     try:
-        outputs = ENHANCER_PIPE_CACHE(messages, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.95)
         final_answer = outputs[0]['generated_text'][-1]['content']
         print(f"Enhanced prompt: '{final_answer.strip()}'")
         return final_answer.strip()
@@ -370,7 +369,7 @@ def generate_t2v_video(
 # --- 6. Gradio UI Layout ---
-def build_ui(t2v_pipe, available_loras):
     """Creates and configures the Gradio UI."""
     with gr.Blocks(theme=gr.themes.Soft(), css=".main-container { max-width: 1080px; margin: auto; }") as demo:
         gr.Markdown("# ✨ Wan 2.1 Text-to-Video Suite with Dynamic LoRAs")
@@ -389,8 +388,8 @@ def build_ui(t2v_pipe, available_loras):
                             )
                             t2v_enhance_btn = gr.Button(
                                 "🤖 Enhance Prompt with AI",
-                                # This is now always interactive. Errors are handled inside the click handler.
-                                interactive=True
                             )
                             with gr.Group():
@@ -426,8 +425,11 @@ def build_ui(t2v_pipe, available_loras):
                             t2v_download = gr.File(label="📥 Download Video", visible=False)
         if t2v_pipe is not None:
             t2v_enhance_btn.click(
-                fn=enhance_prompt_with_llm,
                 inputs=[t2v_prompt],
                 outputs=[t2v_prompt]
             )
@@ -450,12 +452,12 @@ def build_ui(t2v_pipe, available_loras):
 # --- 7. Main Execution ---
 if __name__ == "__main__":
-    t2v_pipe = load_pipelines()
     # Fetch LoRAs only if the main pipeline loaded successfully
     available_loras = []
     if t2v_pipe:
         available_loras = get_available_presets(DYNAMIC_LORA_REPO_ID, DYNAMIC_LORA_SUBFOLDER)
-    app_ui = build_ui(t2v_pipe, available_loras)
     app_ui.queue(max_size=10).launch()

 import random
 import tempfile
 import traceback
+from functools import partial
 import gradio as gr
 import numpy as np
 def load_pipelines():
     """Loads and configures the T2V and LLM pipelines."""
+    t2v_pipe, enhancer_pipe = None, None
     print("\n🚀 Loading T2V pipeline with base LoRA...")
     try:
         traceback.print_exc()
         t2v_pipe = None
+    print("\n🤖 Loading LLM for Prompt Enhancement...")
+    try:
+        # In a ZeroGPU environment, we must load models on the CPU at startup.
+        # The model will be moved to the GPU inside the decorated function.
+        enhancer_pipe = pipeline("text-generation", model=ENHANCER_MODEL_ID, torch_dtype=torch.bfloat16, device="cpu")
+        print("✅ LLM Prompt Enhancer loaded successfully (on CPU).")
+    except Exception as e:
+        print(f"⚠️ WARNING: Could not load the LLM prompt enhancer. The feature will be disabled. Error: {e}")
+        enhancer_pipe = None
+    return t2v_pipe, enhancer_pipe
+# --- 5. Core Generation & UI Logic ---
 @spaces.GPU()
+def enhance_prompt_with_llm(prompt: str, enhancer_pipeline):
     """
+    Uses the loaded LLM to enhance a given prompt.
     """
+    if enhancer_pipeline is None:
+        print("LLM enhancer not available, returning original prompt.")
+        gr.Warning("LLM enhancer is not available.")
+        return prompt
+    # Move the entire pipeline to the GPU. This handles the model, tokenizer, and device settings.
+    enhancer_pipeline.to("cuda")
     messages = [{"role": "system", "content": T2V_CINEMATIC_PROMPT_SYSTEM}, {"role": "user", "content": prompt}]
     print(f"Enhancing prompt: '{prompt}'")
     try:
+        outputs = enhancer_pipeline(messages, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.95)
         final_answer = outputs[0]['generated_text'][-1]['content']
         print(f"Enhanced prompt: '{final_answer.strip()}'")
         return final_answer.strip()
 # --- 6. Gradio UI Layout ---
+def build_ui(t2v_pipe, enhancer_pipe, available_loras):
     """Creates and configures the Gradio UI."""
     with gr.Blocks(theme=gr.themes.Soft(), css=".main-container { max-width: 1080px; margin: auto; }") as demo:
         gr.Markdown("# ✨ Wan 2.1 Text-to-Video Suite with Dynamic LoRAs")
                             )
                             t2v_enhance_btn = gr.Button(
                                 "🤖 Enhance Prompt with AI",
+                                # The button is disabled if the enhancer pipeline failed to load
+                                interactive=enhancer_pipe is not None
                             )
                             with gr.Group():
                             t2v_download = gr.File(label="📥 Download Video", visible=False)
         if t2v_pipe is not None:
+            # Create a partial function that has the enhancer_pipe "baked in".
+            # This avoids the need to pass the complex object through Gradio's state.
+            enhance_fn = partial(enhance_prompt_with_llm, enhancer_pipeline=enhancer_pipe)
             t2v_enhance_btn.click(
+                fn=enhance_fn,
                 inputs=[t2v_prompt],
                 outputs=[t2v_prompt]
             )
 # --- 7. Main Execution ---
 if __name__ == "__main__":
+    t2v_pipe, enhancer_pipe = load_pipelines()
     # Fetch LoRAs only if the main pipeline loaded successfully
     available_loras = []
     if t2v_pipe:
         available_loras = get_available_presets(DYNAMIC_LORA_REPO_ID, DYNAMIC_LORA_SUBFOLDER)
+    app_ui = build_ui(t2v_pipe, enhancer_pipe, available_loras)
     app_ui.queue(max_size=10).launch()