FLUX.1-dev-fa3-aoti-blocks-load

Runtime error

cbensimon HF Staff commited on Sep 5

Commit

c628e2d

verified ·

1 Parent(s): 38cc643

Update optimization.py

Files changed (1) hide show

optimization.py CHANGED Viewed

@@ -28,25 +28,40 @@ INDUCTOR_CONFIGS = {
 def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
-    blocks = pipeline.transformer.transformer_blocks
-    @spaces.GPU(duration=120)
-    def compile_transformer_block():
-        with spaces.aoti_capture(blocks[0]) as call:
             pipeline(*args, **kwargs)
-        exported = torch.export.export(
-            mod=blocks[0],
             args=call.args,
             kwargs=call.kwargs,
         )
-        return spaces.aoti_compile(exported, INDUCTOR_CONFIGS).archive_file
     pipeline.transformer.fuse_qkv_projections()
     pipeline.transformer.set_attn_processor(FlashFusedFluxAttnProcessor3_0())
-    archive_file = compile_transformer_block()
-    for block in blocks:
-        block.forward = ZeroGPUCompiledModel(archive_file, ZeroGPUWeights(block.state_dict()))

 def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
+    blocks_A = pipeline.transformer.transformer_blocks
+    blocks_B = pipeline.transformer.single_transformer_blocks
+    @spaces.GPU(duration=1500)
+    def compile_transformer_block_AB():
+        with spaces.aoti_capture(blocks_A[0]) as call_A:
             pipeline(*args, **kwargs)
+        with spaces.aoti_capture(blocks_B[0]) as call_B:
+            pipeline(*args, **kwargs)
+        exported_A = torch.export.export(
+            mod=blocks_A[0],
             args=call.args,
             kwargs=call.kwargs,
         )
+        exported_B = torch.export.export(
+            mod=blocks_B[0],
+            args=call.args,
+            kwargs=call.kwargs,
+        )
+        return (
+            spaces.aoti_compile(exported_A, INDUCTOR_CONFIGS).archive_file,
+            spaces.aoti_compile(exported_B, INDUCTOR_CONFIGS).archive_file,
+        )
     pipeline.transformer.fuse_qkv_projections()
     pipeline.transformer.set_attn_processor(FlashFusedFluxAttnProcessor3_0())
+    archive_file_A, archive_file_B = compile_transformer_block_AB()
+    for blocks, archive_file in zip((blocks_A, blocks_B), (archive_file_A, archive_file_B)):
+        for block in blocks:
+            weights = ZeroGPUWeights(block.state_dict())
+            block.forward = ZeroGPUCompiledModel(archive_file, weights)