Spaces:

zerogpu-aoti
/

Flux-Compiled-Graph

Running on Zero

File size: 1,330 Bytes

937a94e
 
 
 
 
 
 
4b0fe46
937a94e
4b0fe46
937a94e
f9f24d7
 
937a94e
4b0fe46
 
937a94e
 
 
4b0fe46
 
 
 
 
 
937a94e
 
4b0fe46
f5a3617
c5db835
 
 
 
 
f9f24d7
c5db835
 
 
4b0fe46
c5db835
 
4b0fe46
968b96f
4b0fe46

from typing import Any
from typing import Callable
from typing import ParamSpec
import spaces
import torch
from torch.utils._pytree import tree_map

P = ParamSpec("P")

TRANSFORMER_HIDDEN_DIM = torch.export.Dim("hidden", min=4096, max=8212)

# Specific to Flux. More about this is available in
# https://huggingface.co/blog/zerogpu-aoti
TRANSFORMER_DYNAMIC_SHAPES = {
    "hidden_states": {1: TRANSFORMER_HIDDEN_DIM},
    "img_ids": {0: TRANSFORMER_HIDDEN_DIM},
}

INDUCTOR_CONFIGS = {
    "conv_1x1_as_mm": True,
    "epilogue_fusion": False,
    "coordinate_descent_tuning": True,
    "coordinate_descent_check_all_directions": True,
    "max_autotune": True,
    "triton.cudagraphs": True,
}


def compile_transformer(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
    @spaces.GPU(duration=1500)
    def f():
        with spaces.aoti_capture(pipeline.transformer) as call:
            pipeline(*args, **kwargs)

        dynamic_shapes = tree_map(lambda v: None, call.kwargs)
        dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES

        exported = torch.export.export(
            mod=pipeline.transformer, args=call.args, kwargs=call.kwargs, dynamic_shapes=dynamic_shapes
        )
        return spaces.aoti_compile(exported, INDUCTOR_CONFIGS)

    compiled_transformer = f()
    return compiled_transformer