PL commited on
Commit
acfc65c
·
1 Parent(s): 4ebae5f
Files changed (14) hide show
  1. flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/config-250831.030550.yaml +260 -0
  2. flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/branch.pt +1 -0
  3. flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/model.pt +3 -0
  4. flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/scale.pt +3 -0
  5. flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/smooth.pt +1 -0
  6. flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/wgts.pt +3 -0
  7. flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/run-250831.030550.log +0 -0
  8. flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/config-250828.070127.yaml +236 -0
  9. flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/branch.pt +1 -0
  10. flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/model.pt +3 -0
  11. flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/scale.pt +3 -0
  12. flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/smooth.pt +1 -0
  13. flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/wgts.pt +3 -0
  14. flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/run-250828.070127.log +0 -0
flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/config-250831.030550.yaml ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ enable_cache: true
2
+ cache:
3
+ root: runs
4
+ output:
5
+ root: runs
6
+ dirname: skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000
7
+ job: run
8
+ pipeline:
9
+ name: flux.1-dev-ghibli
10
+ path: /home/pingzhi/checkpoints/fused_flux_dev_studio_ghibli_bf16
11
+ dtype: torch.bfloat16
12
+ device: cuda
13
+ shift_activations: false
14
+ enable_lora: false
15
+ task: text-to-image
16
+ eval:
17
+ protocol: fmeuler50-g3.5
18
+ num_gpus: 1
19
+ batch_size: 1
20
+ batch_size_per_gpu: 1
21
+ height: null
22
+ width: null
23
+ clean_caption: null
24
+ num_steps: 50
25
+ guidance_scale: 3.5
26
+ num_samples: 5000
27
+ benchmarks:
28
+ - MJHQ
29
+ - DCI
30
+ gt_metrics:
31
+ - clip_iqa
32
+ - clip_score
33
+ - image_reward
34
+ - fid
35
+ ref_metrics:
36
+ - psnr
37
+ - lpips
38
+ - ssim
39
+ - fid
40
+ gen_root: '{output}/{job}'
41
+ ref_root: /data/pingzhi/deepcompressor/examples/diffusion/baselines/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5
42
+ gt_stats_root: benchmarks/stats
43
+ control_root: benchmarks
44
+ chunk_start: 0
45
+ chunk_step: 1
46
+ chunk_only: false
47
+ quant:
48
+ wgts:
49
+ dtype: sfp4_e2m1_all
50
+ zero_point: null
51
+ group_shapes:
52
+ - - -1
53
+ - -1
54
+ - -1
55
+ - - 1
56
+ - 16
57
+ - 1
58
+ - 1
59
+ - 1
60
+ scale_dtypes:
61
+ - null
62
+ - sfp8_e4m3_nan
63
+ enable_kernel_gptq: false
64
+ enable_low_rank: true
65
+ low_rank:
66
+ rank: 32
67
+ exclusive: false
68
+ compensate: false
69
+ degree: 2
70
+ objective: OutputsError
71
+ strategy: Manual
72
+ sample_batch_size: 16
73
+ sample_size: -1
74
+ outputs_device: cpu
75
+ num_iters: 100
76
+ early_stop: true
77
+ skips:
78
+ - down_resblock_conv
79
+ - down_transformer_add_norm
80
+ - down_transformer_norm
81
+ - mid_resblock_conv
82
+ - mid_transformer_add_norm
83
+ - mid_transformer_norm
84
+ - transformer_add_norm
85
+ - transformer_norm
86
+ - up_resblock_conv
87
+ - up_transformer_add_norm
88
+ - up_transformer_norm
89
+ enable_calib_range: true
90
+ calib_range:
91
+ degree: 2
92
+ objective: OutputsError
93
+ strategy: Manual
94
+ granularity: Layer
95
+ element_batch_size: -1
96
+ sample_batch_size: 16
97
+ element_size: -1
98
+ sample_size: -1
99
+ pre_reshape: true
100
+ outputs_device: cpu
101
+ ratio: 1.0
102
+ max_shrink: 0.2
103
+ max_expand: 1.0
104
+ num_grids: 80
105
+ allow_scale: false
106
+ skips: []
107
+ skips:
108
+ - down_resblock_shortcut
109
+ - down_resblock_time_proj
110
+ - down_sample
111
+ - down_transformer_proj_in
112
+ - down_transformer_proj_out
113
+ - input_embed
114
+ - mid_resblock_shortcut
115
+ - mid_resblock_time_proj
116
+ - mid_transformer_proj_in
117
+ - mid_transformer_proj_out
118
+ - output_embed
119
+ - text_embed
120
+ - time_embed
121
+ - up_resblock_shortcut
122
+ - up_resblock_time_proj
123
+ - up_sample
124
+ - up_transformer_proj_in
125
+ - up_transformer_proj_out
126
+ ipts:
127
+ dtype: sfp4_e2m1_all
128
+ zero_point: null
129
+ group_shapes:
130
+ - - 1
131
+ - 16
132
+ - 1
133
+ - 1
134
+ - 1
135
+ scale_dtypes:
136
+ - sfp8_e4m3_nan
137
+ static: false
138
+ enable_calib_range: false
139
+ skips:
140
+ - down_resblock_shortcut
141
+ - down_resblock_time_proj
142
+ - down_sample
143
+ - down_transformer_add_norm
144
+ - down_transformer_norm
145
+ - down_transformer_proj_in
146
+ - down_transformer_proj_out
147
+ - input_embed
148
+ - mid_resblock_shortcut
149
+ - mid_resblock_time_proj
150
+ - mid_transformer_add_norm
151
+ - mid_transformer_norm
152
+ - mid_transformer_proj_in
153
+ - mid_transformer_proj_out
154
+ - output_embed
155
+ - text_embed
156
+ - time_embed
157
+ - transformer_add_norm
158
+ - transformer_norm
159
+ - up_resblock_shortcut
160
+ - up_resblock_time_proj
161
+ - up_sample
162
+ - up_transformer_add_norm
163
+ - up_transformer_norm
164
+ - up_transformer_proj_in
165
+ - up_transformer_proj_out
166
+ allow_unsigned: false
167
+ opts:
168
+ dtype: null
169
+ zero_point: null
170
+ group_shapes:
171
+ - - -1
172
+ - -1
173
+ - -1
174
+ scale_dtypes:
175
+ - null
176
+ static: false
177
+ enable_calib_range: false
178
+ skips: []
179
+ allow_unsigned: false
180
+ enable_extra_wgts: true
181
+ extra_wgts:
182
+ dtype: sint4
183
+ zero_point: null
184
+ group_shapes:
185
+ - - 1
186
+ - 64
187
+ - 1
188
+ - 1
189
+ - 1
190
+ scale_dtypes:
191
+ - null
192
+ includes:
193
+ - down_transformer_add_norm
194
+ - down_transformer_norm
195
+ - mid_transformer_add_norm
196
+ - mid_transformer_norm
197
+ - transformer_add_norm
198
+ - transformer_norm
199
+ - up_transformer_add_norm
200
+ - up_transformer_norm
201
+ calib:
202
+ data: qdiff
203
+ num_samples: 128
204
+ batch_size: 16
205
+ path: /data/pingzhi/deepcompressor/examples/diffusion/datasets/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5/qdiff/s128
206
+ num_workers: 8
207
+ enable_rotation: false
208
+ enable_smooth: true
209
+ smooth:
210
+ enable_proj: true
211
+ proj:
212
+ degree: 2
213
+ objective: OutputsError
214
+ strategy: GridSearch
215
+ granularity: Layer
216
+ element_batch_size: -1
217
+ sample_batch_size: 16
218
+ element_size: -1
219
+ sample_size: -1
220
+ pre_reshape: true
221
+ outputs_device: cpu
222
+ fuse_when_possible: false
223
+ allow_a_quant: true
224
+ allow_b_quant: true
225
+ spans:
226
+ - - AbsMax
227
+ - AbsMax
228
+ alpha: 0.5
229
+ beta: -2
230
+ num_grids: 20
231
+ allow_low_rank: true
232
+ skips:
233
+ - down_resblock_conv
234
+ - down_transformer_add_norm
235
+ - down_transformer_norm
236
+ - mid_resblock_conv
237
+ - mid_transformer_add_norm
238
+ - mid_transformer_norm
239
+ - transformer_add_norm
240
+ - transformer_norm
241
+ - up_resblock_conv
242
+ - up_transformer_add_norm
243
+ - up_transformer_norm
244
+ enable_attn: false
245
+ develop_dtype: torch.float32
246
+ enable_text: false
247
+ text_cache:
248
+ root: ''
249
+ path:
250
+ rotation: ''
251
+ reorder: ''
252
+ smooth: ''
253
+ wgts: ''
254
+ acts: ''
255
+ seed: 12345
256
+ skip_gen: true
257
+ skip_eval: true
258
+ load_from: ''
259
+ save_model: 'true'
260
+ copy_on_save: false
flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/branch.pt ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../../../../../../../../cache/quant/qdiff.128/branch/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]-w.include.[tan+tn]/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/lowrank.OutputsError.Manual.Layer.d2.en1.sn1/lowrank.i100.r32.earlystop/lowrank.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt
flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc5cd04b5dbc2542d524d8725ef4db10184f2cb7a68c391a421ea141e96dd3fb
3
+ size 23803297253
flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/scale.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6048e14f91974ce019fe63b48320b221b3682f3617e402277def100fdb8782
3
+ size 2353874427
flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/smooth.pt ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../../../../../../../../cache/quant/qdiff.128/smooth/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]-w.include.[tan+tn]/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt
flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/wgts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836f77a150a2e5e4ad6f51b65cb1f93b6bf98a16cdf9ce9cd19652a1e102d3dc
3
+ size 39771
flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/run-250831.030550.log ADDED
The diff for this file is too large to render. See raw diff
 
flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/config-250828.070127.yaml ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ enable_cache: true
2
+ cache:
3
+ root: runs
4
+ output:
5
+ root: runs
6
+ dirname: shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000
7
+ job: run
8
+ pipeline:
9
+ name: flux.1-dev-ghibli
10
+ path: /home/pingzhi/checkpoints/fused_flux_dev_studio_ghibli_bf16
11
+ dtype: torch.bfloat16
12
+ device: cuda
13
+ shift_activations: true
14
+ enable_lora: false
15
+ task: text-to-image
16
+ eval:
17
+ protocol: fmeuler50-g3.5
18
+ num_gpus: 1
19
+ batch_size: 1
20
+ batch_size_per_gpu: 1
21
+ height: null
22
+ width: null
23
+ clean_caption: null
24
+ num_steps: 50
25
+ guidance_scale: 3.5
26
+ num_samples: 5000
27
+ benchmarks:
28
+ - MJHQ
29
+ - DCI
30
+ gt_metrics:
31
+ - clip_iqa
32
+ - clip_score
33
+ - image_reward
34
+ - fid
35
+ ref_metrics:
36
+ - psnr
37
+ - lpips
38
+ - ssim
39
+ - fid
40
+ gen_root: '{output}/{job}'
41
+ ref_root: /data/pingzhi/deepcompressor/examples/diffusion/baselines/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5
42
+ gt_stats_root: benchmarks/stats
43
+ control_root: benchmarks
44
+ chunk_start: 0
45
+ chunk_step: 1
46
+ chunk_only: false
47
+ quant:
48
+ wgts:
49
+ dtype: sint4
50
+ zero_point: null
51
+ group_shapes:
52
+ - - 1
53
+ - 64
54
+ - 1
55
+ - 1
56
+ - 1
57
+ scale_dtypes:
58
+ - null
59
+ enable_kernel_gptq: false
60
+ enable_low_rank: true
61
+ low_rank:
62
+ rank: 32
63
+ exclusive: false
64
+ compensate: false
65
+ degree: 2
66
+ objective: OutputsError
67
+ strategy: Manual
68
+ sample_batch_size: 16
69
+ sample_size: -1
70
+ outputs_device: cpu
71
+ num_iters: 100
72
+ early_stop: true
73
+ skips:
74
+ - down_resblock_conv
75
+ - down_transformer_add_norm
76
+ - down_transformer_norm
77
+ - mid_resblock_conv
78
+ - mid_transformer_add_norm
79
+ - mid_transformer_norm
80
+ - transformer_add_norm
81
+ - transformer_norm
82
+ - up_resblock_conv
83
+ - up_transformer_add_norm
84
+ - up_transformer_norm
85
+ enable_calib_range: true
86
+ calib_range:
87
+ degree: 2
88
+ objective: OutputsError
89
+ strategy: Manual
90
+ granularity: Layer
91
+ element_batch_size: -1
92
+ sample_batch_size: 16
93
+ element_size: -1
94
+ sample_size: -1
95
+ pre_reshape: true
96
+ outputs_device: cpu
97
+ ratio: 1.0
98
+ max_shrink: 0.2
99
+ max_expand: 1.0
100
+ num_grids: 80
101
+ allow_scale: false
102
+ skips: []
103
+ skips:
104
+ - down_resblock_shortcut
105
+ - down_resblock_time_proj
106
+ - down_sample
107
+ - down_transformer_proj_in
108
+ - down_transformer_proj_out
109
+ - input_embed
110
+ - mid_resblock_shortcut
111
+ - mid_resblock_time_proj
112
+ - mid_transformer_proj_in
113
+ - mid_transformer_proj_out
114
+ - output_embed
115
+ - text_embed
116
+ - time_embed
117
+ - up_resblock_shortcut
118
+ - up_resblock_time_proj
119
+ - up_sample
120
+ - up_transformer_proj_in
121
+ - up_transformer_proj_out
122
+ ipts:
123
+ dtype: sint4
124
+ zero_point: null
125
+ group_shapes:
126
+ - - 1
127
+ - 64
128
+ - 1
129
+ - 1
130
+ - 1
131
+ scale_dtypes:
132
+ - null
133
+ static: false
134
+ enable_calib_range: false
135
+ skips:
136
+ - down_resblock_shortcut
137
+ - down_resblock_time_proj
138
+ - down_sample
139
+ - down_transformer_add_norm
140
+ - down_transformer_norm
141
+ - down_transformer_proj_in
142
+ - down_transformer_proj_out
143
+ - input_embed
144
+ - mid_resblock_shortcut
145
+ - mid_resblock_time_proj
146
+ - mid_transformer_add_norm
147
+ - mid_transformer_norm
148
+ - mid_transformer_proj_in
149
+ - mid_transformer_proj_out
150
+ - output_embed
151
+ - text_embed
152
+ - time_embed
153
+ - transformer_add_norm
154
+ - transformer_norm
155
+ - up_resblock_shortcut
156
+ - up_resblock_time_proj
157
+ - up_sample
158
+ - up_transformer_add_norm
159
+ - up_transformer_norm
160
+ - up_transformer_proj_in
161
+ - up_transformer_proj_out
162
+ allow_unsigned: true
163
+ opts:
164
+ dtype: null
165
+ zero_point: null
166
+ group_shapes:
167
+ - - -1
168
+ - -1
169
+ - -1
170
+ scale_dtypes:
171
+ - null
172
+ static: false
173
+ enable_calib_range: false
174
+ skips: []
175
+ allow_unsigned: false
176
+ enable_extra_wgts: false
177
+ calib:
178
+ data: qdiff
179
+ num_samples: 128
180
+ batch_size: 16
181
+ path: /data/pingzhi/deepcompressor/examples/diffusion/datasets/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5/qdiff/s128
182
+ num_workers: 8
183
+ enable_rotation: false
184
+ enable_smooth: true
185
+ smooth:
186
+ enable_proj: true
187
+ proj:
188
+ degree: 2
189
+ objective: OutputsError
190
+ strategy: GridSearch
191
+ granularity: Layer
192
+ element_batch_size: -1
193
+ sample_batch_size: 16
194
+ element_size: -1
195
+ sample_size: -1
196
+ pre_reshape: true
197
+ outputs_device: cpu
198
+ fuse_when_possible: false
199
+ allow_a_quant: true
200
+ allow_b_quant: true
201
+ spans:
202
+ - - AbsMax
203
+ - AbsMax
204
+ alpha: 0.5
205
+ beta: -2
206
+ num_grids: 20
207
+ allow_low_rank: true
208
+ skips:
209
+ - down_resblock_conv
210
+ - down_transformer_add_norm
211
+ - down_transformer_norm
212
+ - mid_resblock_conv
213
+ - mid_transformer_add_norm
214
+ - mid_transformer_norm
215
+ - transformer_add_norm
216
+ - transformer_norm
217
+ - up_resblock_conv
218
+ - up_transformer_add_norm
219
+ - up_transformer_norm
220
+ enable_attn: false
221
+ develop_dtype: torch.float32
222
+ enable_text: false
223
+ text_cache:
224
+ root: ''
225
+ path:
226
+ rotation: ''
227
+ reorder: ''
228
+ smooth: ''
229
+ wgts: ''
230
+ acts: ''
231
+ seed: 12345
232
+ skip_gen: true
233
+ skip_eval: true
234
+ load_from: ''
235
+ save_model: 'true'
236
+ copy_on_save: false
flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/branch.pt ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../../../../../../../../cache/quant/qdiff.128/branch/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]/shift/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/lowrank.OutputsError.Manual.Layer.d2.en1.sn1/lowrank.i100.r32.earlystop/lowrank.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt
flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58385172e212ea5f720f78eb346eeff8554f6d8b778cd88ab13b2a0242cc7d8b
3
+ size 23803329729
flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/scale.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa78f32c795f98d625d13a237b06f44d50e067327633bbadd7f5fbe6096fb45f
3
+ size 739952787
flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/smooth.pt ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../../../../../../../../cache/quant/qdiff.128/smooth/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]/shift/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt
flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/wgts.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0780a44cf225507e60db7557da5e1665de9bcc7034b33dff598af18bbb06bda7
3
+ size 40347
flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/run-250828.070127.log ADDED
The diff for this file is too large to render. See raw diff