Spaces:

rahul7star
/

Image2Video

Running on Zero

App Files Files Community

rahul7star commited on 1 day ago

Commit

ea621bd

verified ·

1 Parent(s): 1bd9a25

Update app_quant_latent.py

Browse files

Files changed (1) hide show

app_quant_latent.py +71 -97

app_quant_latent.py CHANGED Viewed

@@ -252,110 +252,84 @@ import io
 logs = []
 latent_gallery = []
-def calculate_shift(
-image_seq_len,
-base_seq_len: int = 256,
-max_seq_len: int = 4096,
-base_shift: float = 0.5,
-max_shift: float = 1.15,
-):
- m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
- b = base_shift - m * base_seq_len
- mu = image_seq_len * m + b
- return mu
-def retrieve_timesteps(
-scheduler,
-num_inference_steps: int = None,
-device: str = None,
-timesteps: list = None,
-sigmas: list = None,
-**kwargs,
-):
- if timesteps is not None and sigmas is not None:
-  raise ValueError("Only one of timesteps or sigmas can be passed")
- if timesteps is not None:
-  scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
-  timesteps = scheduler.timesteps
-  num_inference_steps = len(timesteps)
- elif sigmas is not None:
-  scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
-  timesteps = scheduler.timesteps
-  num_inference_steps = len(timesteps)
- else:
-  scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
-  timesteps = scheduler.timesteps
- return timesteps, num_inference_steps
 @spaces.GPU
-def generate_image(prompt, height, width, steps, seed):
- generator = torch.Generator(device).manual_seed(int(seed))
-# Encode prompt
- prompt_embeds, negative_prompt_embeds = pipe.encode_prompt(prompt)
- batch_size = len(prompt_embeds)
- num_images_per_prompt = 1
- actual_batch_size = batch_size * num_images_per_prompt
- num_channels_latents = pipe.transformer.in_channels
- # Prepare latents
- latents = pipe.prepare_latents(
-    actual_batch_size, num_channels_latents, height, width, torch.float32, device, generator
- )
- # Repeat embeddings for multiple images per prompt
- if num_images_per_prompt > 1:
-    prompt_embeds = [pe for pe in prompt_embeds for _ in range(num_images_per_prompt)]
-    if pipe.do_classifier_free_guidance and negative_prompt_embeds:
-        negative_prompt_embeds = [npe for npe in negative_prompt_embeds for _ in range(num_images_per_prompt)]
- image_seq_len = (latents.shape[2] // 2) * (latents.shape[3] // 2)
- mu = calculate_shift(image_seq_len)
- pipe.scheduler.sigma_min = 0.0
- scheduler_kwargs = {"mu": mu}
- timesteps, num_inference_steps = retrieve_timesteps(pipe.scheduler, steps, device, **scheduler_kwargs)
- # Denoising loop
- for i, t in enumerate(timesteps):
-    timestep = t.expand(latents.shape[0])
-    timestep = (1000 - timestep) / 1000
-    t_norm = timestep[0].item()
-    apply_cfg = pipe.do_classifier_free_guidance and pipe.guidance_scale > 0
-    if apply_cfg:
-        latent_model_input = latents.to(pipe.transformer.dtype).repeat(2, 1, 1, 1).unsqueeze(2)
-        prompt_input = prompt_embeds + negative_prompt_embeds
-        timestep_input = timestep.repeat(2)
-    else:
-        latent_model_input = latents.to(pipe.transformer.dtype).unsqueeze(2)
-        prompt_input = prompt_embeds
-        timestep_input = timestep
-    latent_list = list(latent_model_input.unbind(0))
-    model_out_list = pipe.transformer(latent_list, timestep_input, prompt_input, return_dict=False)[0]
-    if apply_cfg:
-        pos_out = model_out_list[:actual_batch_size]
-        neg_out = model_out_list[actual_batch_size:]
-        noise_pred = torch.stack([p + pipe.guidance_scale * (p - n) for p, n in zip(pos_out, neg_out)])
-    else:
-        noise_pred = torch.stack([t.float() for t in model_out_list], 0)
-    noise_pred = noise_pred.squeeze(2)
-    noise_pred = -noise_pred
-    latents = pipe.scheduler.step(noise_pred.to(torch.float32), t, latents, return_dict=False)[0]
- # Decode final image
- latents = latents.to(pipe.vae.dtype)
- latents = (latents / pipe.vae.config.scaling_factor) + pipe.vae.config.shift_factor
- image = pipe.vae.decode(latents, return_dict=False)[0]
- image = pipe.image_processor.postprocess(image, output_type="pil")
- return image, None, None
 # ============================================================

 logs = []
 latent_gallery = []
+import torch
+from PIL import Image
+# Global log storage
+LOGS = []
+def log(msg):
+LOGS.append(msg)
+print(msg)
 @spaces.GPU
+def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0, return_latents=False):
+"""
+Generate an image from a prompt.
+Tries advanced latent-based method; falls back to standard pipeline if anything fails.
+"""
+ try:
+    generator = torch.Generator(device).manual_seed(int(seed))
+    # Try advanced latent preparation
+    try:
+        batch_size = 1
+        num_channels_latents = getattr(pipe.unet, "in_channels", None)
+        if num_channels_latents is None:
+            raise AttributeError("pipe.unet.in_channels not found, fallback to standard pipeline")
+        latents = pipe.prepare_latents(
+            batch_size=batch_size,
+            num_channels=num_channels_latents,
+            height=height,
+            width=width,
+            dtype=torch.float32,
+            device=device,
+            generator=generator
+        )
+        log(f"✅ Latents prepared: {latents.shape}")
+        # Generate image using prepared latents
+        output = pipe(
+            prompt=prompt,
+            height=height,
+            width=width,
+            num_inference_steps=steps,
+            guidance_scale=guidance_scale,
+            generator=generator,
+            latents=latents
+        )
+    except Exception as e_inner:
+        # If advanced method fails, fallback to standard pipeline
+        log(f"⚠️ Advanced latent method failed: {e_inner}")
+        log("🔁 Falling back to standard pipeline...")
+        output = pipe(
+            prompt=prompt,
+            height=height,
+            width=width,
+            num_inference_steps=steps,
+            guidance_scale=guidance_scale,
+            generator=generator
+        )
+    image = output.images[0]
+    log("✅ Inference finished successfully.")
+    if return_latents and 'latents' in locals():
+        return image, latents, LOGS
+    else:
+        return image, LOGS
+ except Exception as e:
+    log(f"❌ Inference failed entirely: {e}")
+    return None, LOGS
 # ============================================================