Elea Zhong commited on
Commit
db0f5c9
·
1 Parent(s): f7e5a2f

update app

Browse files
Files changed (1) hide show
  1. app.py +20 -16
app.py CHANGED
@@ -72,22 +72,26 @@ pipe.set_adapters(["fast_5k"], adapter_weights=[1.0])
72
  pipe.fuse_lora(adapter_names=["fast_5k"], lora_scale=1.0)
73
  pipe.unload_lora_weights()
74
 
75
- pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
76
- pipe.transformer.fuse_qkv_projections()
77
- pipe.transformer.check_fused_qkv()
78
-
79
- optimize_pipeline_(
80
- pipe,
81
- cache_compiled=True,
82
- quantize=True,
83
- suffix="_fp8darow_nolast_fa3_fast5k",
84
- quantize_config=conf_fp8darow_nolast(),
85
- pipe_kwargs={
86
- "image": [Image.new("RGB", (1024, 1024))],
87
- "prompt":"prompt",
88
- "num_inference_steps":2,
89
- }
90
- )
 
 
 
 
91
 
92
  MAX_SEED = np.iinfo(np.int32).max
93
 
 
72
  pipe.fuse_lora(adapter_names=["fast_5k"], lora_scale=1.0)
73
  pipe.unload_lora_weights()
74
 
75
+ @spaces.GPU(duration=1500)
76
+ def optim_pipe():
77
+ print(f"func cuda: {torch.cuda.is_available()=}")
78
+
79
+ pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
80
+ pipe.transformer.fuse_qkv_projections()
81
+ pipe.transformer.check_fused_qkv()
82
+
83
+ optimize_pipeline_(
84
+ pipe,
85
+ cache_compiled=True,
86
+ quantize=True,
87
+ suffix="_fp8darow_nolast_fa3_fast5k",
88
+ quantize_config=conf_fp8darow_nolast(),
89
+ pipe_kwargs={
90
+ "image": [Image.new("RGB", (1024, 1024))],
91
+ "prompt":"prompt",
92
+ "num_inference_steps":2,
93
+ }
94
+ )
95
 
96
  MAX_SEED = np.iinfo(np.int32).max
97