import gradio as gr import numpy as np import random import torch import spaces from PIL import Image from diffusers import FlowMatchEulerDiscreteScheduler from optimization import optimize_pipeline_ from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3 import math from huggingface_hub import hf_hub_download from safetensors.torch import load_file import os import base64 from io import BytesIO import json import time # Added for history update delay from gradio_client import Client, handle_file import tempfile from PIL import Image import os import gradio as gr # --- Model Loading --- dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2509", transformer= QwenImageTransformer2DModel.from_pretrained("linoyts/Qwen-Image-Edit-Rapid-AIO", subfolder='transformer', torch_dtype=dtype, device_map='cuda'),torch_dtype=dtype).to(device) pipe.load_lora_weights( "dx8152/Qwen-Edit-2509-Multiple-angles", weight_name="镜头转换.safetensors", adapter_name="angles" ) pipe.load_lora_weights( "lovis93/next-scene-qwen-image-lora-2509", weight_name="next-scene_lora-v2-3000.safetensors", adapter_name="next-scene" ) pipe.set_adapters(["angles","next-scene"], adapter_weights=[1., 1.]) pipe.fuse_lora(adapter_names=["angles"], lora_scale=1.) pipe.fuse_lora(adapter_names=["next-scene"], lora_scale=1.) pipe.unload_lora_weights() # # Apply the same optimizations from the first version pipe.transformer.__class__ = QwenImageTransformer2DModel pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3()) # # --- Ahead-of-time compilation --- optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt") # --- UI Constants and Helpers --- MAX_SEED = np.iinfo(np.int32).max # --- Build natural language prompt from sliders --- def build_camera_prompt(rotate_deg, move_lr, move_forward, topdown, wideangle, closeup): prompt_parts = [] # Rotation if rotate_deg != 0: direction = "left" if rotate_deg > 0 else "right" prompt_parts.append(f"Rotate the camera {abs(rotate_deg)} degrees to the {direction}.") # Movement if move_lr > 0: prompt_parts.append("Move the camera left.") elif move_lr < 0: prompt_parts.append("Move the camera right.") if move_forward > 0: prompt_parts.append("Move the camera forward.") elif move_forward < 0: prompt_parts.append("Move the camera backward.") # Lens / perspective options if topdown: prompt_parts.append("Turn the camera to a top-down view.") if wideangle: prompt_parts.append("Turn the camera to a wide-angle lens.") if closeup: prompt_parts.append("Turn the camera to a close-up lens.") final_prompt = " ".join(prompt_parts).strip() return final_prompt if final_prompt else "No camera movement." # --- Main inference function (unchanged backend) --- @spaces.GPU(duration=300) def infer_camera_edit( image, prev_output, rotate_deg, move_lr, move_forward, topdown, wideangle, closeup, seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, ): prompt = build_camera_prompt(rotate_deg, move_lr, move_forward, topdown, wideangle, closeup) print(f"Generated Prompt: {prompt}") if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator(device=device).manual_seed(seed) # Use previous output if no new image uploaded pil_images = [] if image is not None: if isinstance(image, Image.Image): pil_images.append(image.convert("RGB")) elif hasattr(image, "name"): pil_images.append(Image.open(image.name).convert("RGB")) elif prev_output is not None: pil_images.append(prev_output.convert("RGB")) if len(pil_images) == 0: raise gr.Error("Please upload an image first.") result = pipe( image=pil_images, prompt=prompt, height=height if height != 0 else None, width=width if width != 0 else None, num_inference_steps=num_inference_steps, generator=generator, true_cfg_scale=true_guidance_scale, num_images_per_prompt=1, ).images[0] return result, seed, prompt # --- Gradio UI --- css = ''' #col-container { max-width: 800px; margin: 0 auto; } ''' with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown("## 🎬 Qwen Image Edit — Camera Angle Control") gr.Markdown("Edit the same image from multiple camera angles using Qwen Edit and the 'Multiple Angles' LoRA. Each edit applies to the latest output for fluid camera movement.") with gr.Row(): with gr.Column(): image = gr.Image(label="Input Image", type="pil", sources=["upload"]) prev_output = gr.State(value=None) with gr.Tab("Camera Controls"): rotate_deg = gr.Slider( label="Rotate Left–Right (°)", minimum=-90, maximum=90, step=45, value=0) move_lr = gr.Slider(label="Move Right–Left", minimum=-10, maximum=10, step=1, value=0) move_forward = gr.Slider(label="Move Forward/Backward", minimum=-10, maximum=10, step=1, value=0) topdown = gr.Checkbox(label="Top-Down View", value=False) wideangle = gr.Checkbox(label="Wide-Angle Lens", value=False) closeup = gr.Checkbox(label="Close-Up Lens", value=False) with gr.Accordion("Advanced Settings", open=False): seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0) num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4) height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024) width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024) with gr.Row(): reset_btn = gr.Button("Reset") run_btn = gr.Button("Generate", variant="primary") with gr.Column(): result = gr.Image(label="Output Image") prompt_preview = gr.Textbox(label="Generated Prompt", interactive=False) gr.Markdown("_Each change applies a fresh camera instruction to the last output image._") # Define inputs & outputs inputs = [ image, prev_output, rotate_deg, move_lr, move_forward, topdown, wideangle, closeup, seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width ] outputs = [result, seed, prompt_preview] def reset_all(): return [0, 0, 0, False, False, False] reset_btn.click( fn=reset_all, inputs=None, outputs=[rotate_deg, move_lr, move_forward, topdown, wideangle, closeup], queue=False ) run_event = run_btn.click( fn=infer_camera_edit, inputs=inputs, outputs=outputs ) # Live updates on control release for control in [rotate_deg, move_lr, move_forward, topdown, wideangle, closeup]: control.change(fn=infer_camera_edit, inputs=inputs, outputs=outputs, show_progress="minimal") # Save latest output as next input run_event.then(lambda img, *_: img, inputs=outputs, outputs=[prev_output]) demo.launch()