Spaces:

eleazhong
/

2-Step-Qwen-Image-Edit

Running on Zero

App Files Files Community

Elea Zhong commited on about 1 month ago

Commit

cb0b907

1 Parent(s): 1ae3ff6

fp8 experiment

Browse files

Files changed (5) hide show

app.py +29 -26
qwenimage/experiments/experiments_qwen.py +58 -3
qwenimage/models/pipeline_qwenimage_edit_plus.py +1 -0
qwenimage/optimization.py +1 -0
scripts/plot_data.ipynb +0 -0

app.py CHANGED Viewed

@@ -17,8 +17,8 @@ from safetensors.torch import load_file
 from torchao.quantization import quantize_
 from torchao.quantization import Int8WeightOnlyConfig
-from qwenimage.debug import ftimed
-from qwenimage.experiments.experiments_qwen import Qwen_FA3_AoT_int8
 from qwenimage.optimization import optimize_pipeline_
 from qwenimage.prompt import build_camera_prompt
 from qwenimage.models.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
@@ -30,6 +30,7 @@ dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 exp = Qwen_FA3_AoT_int8()
 exp.load()
 exp.optimize()
 pipe = exp.pipe
@@ -55,30 +56,31 @@ def infer_camera_edit(
     prev_output = None,
     progress=gr.Progress(track_tqdm=True)
 ):
-    prompt = build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle)
-    print(f"Generated Prompt: {prompt}")
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator(device=device).manual_seed(seed)
-    # Choose input image (prefer uploaded, else last output)
-    pil_images = []
-    if image is not None:
-        if isinstance(image, Image.Image):
-            pil_images.append(image.convert("RGB"))
-        elif hasattr(image, "name"):
-            pil_images.append(Image.open(image.name).convert("RGB"))
-    elif prev_output:
-        pil_images.append(prev_output.convert("RGB"))
-    if len(pil_images) == 0:
-        raise gr.Error("Please upload an image first.")
-    print(f"{len(pil_images)=}")
-    if prompt == "no camera movement":
-        return image, seed, prompt
     result = pipe(
         image=pil_images,
         prompt=prompt,
@@ -154,7 +156,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
                     seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
                     randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                     true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
-                    num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)
                     height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
                     width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
@@ -202,6 +204,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
     # Live updates
     def maybe_infer(is_reset, progress=gr.Progress(track_tqdm=True), *args):
         if is_reset:
             return gr.update(), gr.update(), gr.update(), gr.update()

 from torchao.quantization import quantize_
 from torchao.quantization import Int8WeightOnlyConfig
+from qwenimage.debug import ctimed, ftimed
+from qwenimage.experiments.experiments_qwen import Qwen_FA3_AoT_fp8, Qwen_FA3_AoT_int8, QwenBaseExperiment
 from qwenimage.optimization import optimize_pipeline_
 from qwenimage.prompt import build_camera_prompt
 from qwenimage.models.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
 device = "cuda" if torch.cuda.is_available() else "cpu"
 exp = Qwen_FA3_AoT_int8()
+# exp = Qwen_FA3_AoT_fp8()
 exp.load()
 exp.optimize()
 pipe = exp.pipe
     prev_output = None,
     progress=gr.Progress(track_tqdm=True)
 ):
+    with ctimed("pre pipe"):
+        prompt = build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle)
+        print(f"Generated Prompt: {prompt}")
+        if randomize_seed:
+            seed = random.randint(0, MAX_SEED)
+        generator = torch.Generator(device=device).manual_seed(seed)
+        # Choose input image (prefer uploaded, else last output)
+        pil_images = []
+        if image is not None:
+            if isinstance(image, Image.Image):
+                pil_images.append(image.convert("RGB"))
+            elif hasattr(image, "name"):
+                pil_images.append(Image.open(image.name).convert("RGB"))
+        elif prev_output:
+            pil_images.append(prev_output.convert("RGB"))
+        if len(pil_images) == 0:
+            raise gr.Error("Please upload an image first.")
+        print(f"{len(pil_images)=}")
+        if prompt == "no camera movement":
+            return image, seed, prompt
     result = pipe(
         image=pil_images,
         prompt=prompt,
                     seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
                     randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                     true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
+                    num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=3)
                     height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
                     width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
     # Live updates
+    @ftimed
     def maybe_infer(is_reset, progress=gr.Progress(track_tqdm=True), *args):
         if is_reset:
             return gr.update(), gr.update(), gr.update(), gr.update()

qwenimage/experiments/experiments_qwen.py CHANGED Viewed

@@ -10,7 +10,7 @@ import torch
 from PIL import Image
 import pandas as pd
 from spaces.zero.torch.aoti import ZeroGPUCompiledModel, ZeroGPUWeights
-from torchao.quantization import Float8WeightOnlyConfig, Int4WeightOnlyConfig, Int8DynamicActivationInt4WeightConfig, Int8DynamicActivationInt8WeightConfig, quantize_
 from torchao.quantization import Int8WeightOnlyConfig
 import spaces
 import torch
@@ -238,7 +238,7 @@ class Qwen_FA3_AoT_int8(QwenBaseExperiment):
         )
-@ExperimentRegistry.register(name="qwen_fp8")
 class Qwen_fp8(QwenBaseExperiment):
     @ftimed
     def optimize(self):
@@ -247,7 +247,7 @@ class Qwen_fp8(QwenBaseExperiment):
         quantize_(self.pipe.transformer, Float8WeightOnlyConfig())
-@ExperimentRegistry.register(name="qwen_int8")
 class Qwen_int8(QwenBaseExperiment):
     @ftimed
     def optimize(self):
@@ -255,3 +255,58 @@ class Qwen_int8(QwenBaseExperiment):
         self.pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
         quantize_(self.pipe.transformer, Int8WeightOnlyConfig())

 from PIL import Image
 import pandas as pd
 from spaces.zero.torch.aoti import ZeroGPUCompiledModel, ZeroGPUWeights
+from torchao.quantization import Float8DynamicActivationFloat8WeightConfig, Float8WeightOnlyConfig, Int4WeightOnlyConfig, Int8DynamicActivationInt4WeightConfig, Int8DynamicActivationInt8WeightConfig, quantize_
 from torchao.quantization import Int8WeightOnlyConfig
 import spaces
 import torch
         )
+# @ExperimentRegistry.register(name="qwen_fp8")
 class Qwen_fp8(QwenBaseExperiment):
     @ftimed
     def optimize(self):
         quantize_(self.pipe.transformer, Float8WeightOnlyConfig())
+# @ExperimentRegistry.register(name="qwen_int8")
 class Qwen_int8(QwenBaseExperiment):
     @ftimed
     def optimize(self):
         self.pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
         quantize_(self.pipe.transformer, Int8WeightOnlyConfig())
+@ExperimentRegistry.register(name="qwen_fa3_aot_fp8")
+class Qwen_FA3_AoT_fp8(QwenBaseExperiment):
+    @ftimed
+    @spaces.GPU()
+    def optimize(self):
+        self.pipe.transformer.__class__ = QwenImageTransformer2DModel
+        self.pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
+        pipe_kwargs={
+            "image": [Image.new("RGB", (1024, 1024))],
+            "prompt":"prompt",
+            "num_inference_steps":4
+        }
+        suffix="_fa3"
+        cache_compiled=self.config.cache_compiled
+        transformer_pt2_cache_path = f"checkpoints/transformer_fp8{suffix}_archive.pt2"
+        transformer_weights_cache_path = f"checkpoints/transformer_fp8{suffix}_weights.pt"
+        print(f"original model size: {get_model_size_in_bytes(self.pipe.transformer) / 1024 / 1024} MB")
+        quantize_(self.pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
+        print_first_param(self.pipe.transformer)
+        print(f"quantized model size: {get_model_size_in_bytes(self.pipe.transformer) / 1024 / 1024} MB")
+        inductor_config = INDUCTOR_CONFIGS
+        if os.path.isfile(transformer_pt2_cache_path) and cache_compiled:
+            drain_module_parameters(self.pipe.transformer)
+            zerogpu_weights = torch.load(transformer_weights_cache_path, weights_only=False)
+            compiled_transformer = ZeroGPUCompiledModel(transformer_pt2_cache_path, zerogpu_weights)
+        else:
+            with spaces.aoti_capture(self.pipe.transformer) as call:
+                self.pipe(**pipe_kwargs)
+            dynamic_shapes = tree_map(lambda t: None, call.kwargs)
+            dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
+            exported = torch.export.export(
+                mod=self.pipe.transformer,
+                args=call.args,
+                kwargs=call.kwargs,
+                dynamic_shapes=dynamic_shapes,
+            )
+            compiled_transformer = spaces.aoti_compile(exported, inductor_config)
+            with open(transformer_pt2_cache_path, "wb") as f:
+                f.write(compiled_transformer.archive_file.getvalue())
+            torch.save(compiled_transformer.weights, transformer_weights_cache_path)
+        aoti_apply(compiled_transformer, self.pipe.transformer)

qwenimage/models/pipeline_qwenimage_edit_plus.py CHANGED Viewed

@@ -521,6 +521,7 @@ class QwenImageEditPlusPipeline(DiffusionPipeline, QwenImageLoraLoaderMixin):
     @torch.no_grad()
     @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
         self,
         image: Optional[PipelineImageInput] = None,

     @torch.no_grad()
     @replace_example_docstring(EXAMPLE_DOC_STRING)
+    @ftimed
     def __call__(
         self,
         image: Optional[PipelineImageInput] = None,

qwenimage/optimization.py CHANGED Viewed

	@@ -117,3 +117,4 @@ def optimize_pipeline_(
117
118
119	aoti_apply(compiled_transformer, pipeline.transformer)


117
118
119	aoti_apply(compiled_transformer, pipeline.transformer)
120	+

scripts/plot_data.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff