self-forcing

Runtime error

App Files Files Community

innoai commited on Jun 19

Commit

bd4727a

verified ·

1 Parent(s): a0f21c7

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -7

app.py CHANGED Viewed

@@ -151,6 +151,24 @@ APP_STATE = {
 # Global variable to store generated video chunks
 generated_video_chunks = []
 def frames_to_ts_file(frames, filepath, fps = 15):
     """
     Convert frames directly to .ts file using PyAV.
@@ -309,10 +327,10 @@ pipeline.to(dtype=torch.float16).to(gpu)
 @torch.no_grad()
 @spaces.GPU
-def video_generation_handler_streaming(prompt, seed=42, fps=15):
     """
     Generator function that yields .ts video chunks using PyAV for streaming.
-    Now optimized for block-based processing.
     """
     global generated_video_chunks
     generated_video_chunks = []  # Reset chunks for new generation
@@ -320,7 +338,13 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
     if seed == -1:
         seed = random.randint(0, 2**32 - 1)
-    print(f"🎬 Starting PyAV streaming: '{prompt}', seed: {seed}")
     # Setup
     conditional_dict = text_encoder(text_prompts=[prompt])
@@ -330,7 +354,9 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
     rnd = torch.Generator(gpu).manual_seed(int(seed))
     pipeline._initialize_kv_cache(1, torch.float16, device=gpu)
     pipeline._initialize_crossattn_cache(1, torch.float16, device=gpu)
-    noise = torch.randn([1, 21, 16, 60, 104], device=gpu, dtype=torch.float16, generator=rnd)
     vae_cache, latents_cache = None, None
     if not APP_STATE["current_use_taehv"] and not args.trt:
@@ -464,7 +490,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
     if all_frames_for_download:
         try:
             mp4_uuid = str(uuid.uuid4())[:8]
-            mp4_filename = f"generated_video_{mp4_uuid}.mp4"
             mp4_path = os.path.join("gradio_tmp", mp4_filename)
             frames_to_mp4_file(all_frames_for_download, mp4_path, fps)
             final_mp4_path = mp4_path
@@ -486,7 +512,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
         f"      📊 Generated {total_frames_yielded} frames across {num_blocks} blocks"
         f"    </p>"
         f"    <p style='margin: 4px 0 0 0; color: #0f5132; font-size: 14px;'>"
-        f"      🎬 Playback: {fps} FPS • 📁 Format: MPEG-TS/H.264"
         f"    </p>"
         f"  </div>"
         f"</div>"
@@ -533,6 +559,14 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
                     info="Use -1 for random seed",
                     precision=0
                 )
                 fps = gr.Slider(
                     label="Playback FPS",
                     minimum=1,
@@ -585,7 +619,7 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
     # Connect the generator to the streaming video
     generation_event = start_btn.click(
         fn=video_generation_handler_streaming,
-        inputs=[prompt, seed, fps],
         outputs=[streaming_video, status_display, complete_video, download_file]
     )

 # Global variable to store generated video chunks
 generated_video_chunks = []
+# Video aspect ratio configurations
+ASPECT_RATIOS = {
+    "16:9": {
+        "width": 832,
+        "height": 468,
+        "latent_w": 104,
+        "latent_h": 60,
+        "display_name": "16:9 (Landscape)"
+    },
+    "9:16": {
+        "width": 468,
+        "height": 832,
+        "latent_w": 60,
+        "latent_h": 104,
+        "display_name": "9:16 (Portrait)"
+    }
+}
 def frames_to_ts_file(frames, filepath, fps = 15):
     """
     Convert frames directly to .ts file using PyAV.
 @torch.no_grad()
 @spaces.GPU
+def video_generation_handler_streaming(prompt, seed=42, fps=15, aspect_ratio="16:9"):
     """
     Generator function that yields .ts video chunks using PyAV for streaming.
+    Now optimized for block-based processing with aspect ratio support.
     """
     global generated_video_chunks
     generated_video_chunks = []  # Reset chunks for new generation
     if seed == -1:
         seed = random.randint(0, 2**32 - 1)
+    # Get aspect ratio configuration
+    ar_config = ASPECT_RATIOS[aspect_ratio]
+    latent_w = ar_config["latent_w"]
+    latent_h = ar_config["latent_h"]
+    print(f"🎬 Starting PyAV streaming: '{prompt}', seed: {seed}, aspect ratio: {aspect_ratio}")
+    print(f"📐 Video dimensions: {ar_config['width']}x{ar_config['height']}, Latent: {latent_w}x{latent_h}")
     # Setup
     conditional_dict = text_encoder(text_prompts=[prompt])
     rnd = torch.Generator(gpu).manual_seed(int(seed))
     pipeline._initialize_kv_cache(1, torch.float16, device=gpu)
     pipeline._initialize_crossattn_cache(1, torch.float16, device=gpu)
+    # Create noise with appropriate dimensions for the aspect ratio
+    noise = torch.randn([1, 21, 16, latent_h, latent_w], device=gpu, dtype=torch.float16, generator=rnd)
     vae_cache, latents_cache = None, None
     if not APP_STATE["current_use_taehv"] and not args.trt:
     if all_frames_for_download:
         try:
             mp4_uuid = str(uuid.uuid4())[:8]
+            mp4_filename = f"generated_video_{mp4_uuid}_{aspect_ratio.replace(':', 'x')}.mp4"
             mp4_path = os.path.join("gradio_tmp", mp4_filename)
             frames_to_mp4_file(all_frames_for_download, mp4_path, fps)
             final_mp4_path = mp4_path
         f"      📊 Generated {total_frames_yielded} frames across {num_blocks} blocks"
         f"    </p>"
         f"    <p style='margin: 4px 0 0 0; color: #0f5132; font-size: 14px;'>"
+        f"      🎬 Playback: {fps} FPS • 📁 Format: MPEG-TS/H.264 • 📐 Aspect Ratio: {aspect_ratio}"
         f"    </p>"
         f"  </div>"
         f"</div>"
                     info="Use -1 for random seed",
                     precision=0
                 )
+                aspect_ratio = gr.Radio(
+                    label="Aspect Ratio",
+                    choices=["16:9", "9:16"],
+                    value="16:9",
+                    info="Choose video aspect ratio"
+                )
+            with gr.Row():
                 fps = gr.Slider(
                     label="Playback FPS",
                     minimum=1,
     # Connect the generator to the streaming video
     generation_event = start_btn.click(
         fn=video_generation_handler_streaming,
+        inputs=[prompt, seed, fps, aspect_ratio],
         outputs=[streaming_video, status_display, complete_video, download_file]
     )